diff --git a/404.html b/404.html
index 5ec9cfd56..730f28844 100644
--- a/404.html
+++ b/404.html
@@ -9,7 +9,7 @@
       
       
       <link rel="icon" href="/pheval/assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
diff --git a/CODE_OF_CONDUCT/index.html b/CODE_OF_CONDUCT/index.html
index 9ff42f4ac..0e5f406c5 100644
--- a/CODE_OF_CONDUCT/index.html
+++ b/CODE_OF_CONDUCT/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/CODE_OF_CONDUCT/">
       
       <link rel="icon" href="../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
diff --git a/about/index.html b/about/index.html
index 3b634a095..342cd9da5 100644
--- a/about/index.html
+++ b/about/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/about/">
       
       <link rel="icon" href="../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
diff --git a/api/pheval/analyse/analysis/index.html b/api/pheval/analyse/analysis/index.html
index 62fdc1872..f0da5df64 100644
--- a/api/pheval/analyse/analysis/index.html
+++ b/api/pheval/analyse/analysis/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/analyse/analysis/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -491,21 +491,21 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.analysis" class="md-nav__link">
-    src.pheval.analyse.analysis
+    analysis
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.analysis.benchmark_directory" class="md-nav__link">
-    benchmark_directory()
+    benchmark_directory
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.analysis.benchmark_run_comparisons" class="md-nav__link">
-    benchmark_run_comparisons()
+    benchmark_run_comparisons
   </a>
   
 </li>
@@ -1221,21 +1221,21 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.analysis" class="md-nav__link">
-    src.pheval.analyse.analysis
+    analysis
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.analysis.benchmark_directory" class="md-nav__link">
-    benchmark_directory()
+    benchmark_directory
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.analysis.benchmark_run_comparisons" class="md-nav__link">
-    benchmark_run_comparisons()
+    benchmark_run_comparisons
   </a>
   
 </li>
@@ -1265,6 +1265,7 @@ <h1>Analysis</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.analyse.analysis"></a>
   <div class="doc doc-contents first">
 
@@ -1280,12 +1281,13 @@ <h1>Analysis</h1>
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.analyse.analysis.benchmark_directory" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">benchmark_directory</span><span class="p">(</span><span class="n">results_dir_and_input</span><span class="p">,</span> <span class="n">score_order</span><span class="p">,</span> <span class="n">output_prefix</span><span class="p">,</span> <span class="n">threshold</span><span class="p">,</span> <span class="n">gene_analysis</span><span class="p">,</span> <span class="n">variant_analysis</span><span class="p">,</span> <span class="n">disease_analysis</span><span class="p">,</span> <span class="n">plot_type</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">benchmark_directory</span><span class="p">(</span><span class="n">results_dir_and_input</span><span class="p">,</span> <span class="n">score_order</span><span class="p">,</span> <span class="n">output_prefix</span><span class="p">,</span> <span class="n">threshold</span><span class="p">,</span> <span class="n">gene_analysis</span><span class="p">,</span> <span class="n">variant_analysis</span><span class="p">,</span> <span class="n">disease_analysis</span><span class="p">,</span> <span class="n">plot_type</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1294,6 +1296,8 @@ <h2 id="src.pheval.analyse.analysis.benchmark_directory" class="doc doc-heading"
   
       <p>Benchmark prioritisation performance for a single run.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1310,7 +1314,11 @@ <h2 id="src.pheval.analyse.analysis.benchmark_directory" class="doc doc-heading"
           <td>
                 <code><span title="pheval.analyse.run_data_parser.TrackInputOutputDirectories">TrackInputOutputDirectories</span></code>
           </td>
-          <td><p>Input and output directories for tracking results.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Input and output directories for tracking results.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1320,7 +1328,11 @@ <h2 id="src.pheval.analyse.analysis.benchmark_directory" class="doc doc-heading"
           <td>
                 <code>str</code>
           </td>
-          <td><p>The order in which scores are arranged, this can be either ascending or descending.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The order in which scores are arranged, this can be either ascending or descending.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1330,7 +1342,11 @@ <h2 id="src.pheval.analyse.analysis.benchmark_directory" class="doc doc-heading"
           <td>
                 <code>str</code>
           </td>
-          <td><p>Prefix for the benchmark output file names.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Prefix for the benchmark output file names.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1340,7 +1356,11 @@ <h2 id="src.pheval.analyse.analysis.benchmark_directory" class="doc doc-heading"
           <td>
                 <code>float</code>
           </td>
-          <td><p>The threshold for benchmark evaluation.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The threshold for benchmark evaluation.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1350,7 +1370,11 @@ <h2 id="src.pheval.analyse.analysis.benchmark_directory" class="doc doc-heading"
           <td>
                 <code>bool</code>
           </td>
-          <td><p>Boolean flag indicating whether to benchmark gene results.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Boolean flag indicating whether to benchmark gene results.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1360,7 +1384,11 @@ <h2 id="src.pheval.analyse.analysis.benchmark_directory" class="doc doc-heading"
           <td>
                 <code>bool</code>
           </td>
-          <td><p>Boolean flag indicating whether to benchmark variant results.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Boolean flag indicating whether to benchmark variant results.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1370,7 +1398,11 @@ <h2 id="src.pheval.analyse.analysis.benchmark_directory" class="doc doc-heading"
           <td>
                 <code>bool</code>
           </td>
-          <td><p>Boolean flag indicating whether to benchmark disease results.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Boolean flag indicating whether to benchmark disease results.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1380,7 +1412,11 @@ <h2 id="src.pheval.analyse.analysis.benchmark_directory" class="doc doc-heading"
           <td>
                 <code>str</code>
           </td>
-          <td><p>Type of plot for benchmark visualisation.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Type of plot for benchmark visualisation.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1388,9 +1424,9 @@ <h2 id="src.pheval.analyse.analysis.benchmark_directory" class="doc doc-heading"
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/analysis.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 53</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/analysis.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 53</span>
 <span class="normal"> 54</span>
 <span class="normal"> 55</span>
 <span class="normal"> 56</span>
@@ -1490,17 +1526,18 @@ <h2 id="src.pheval.analyse.analysis.benchmark_directory" class="doc doc-heading"
             <span class="n">benchmark_generator</span><span class="o">=</span><span class="n">DiseaseBenchmarkRunOutputGenerator</span><span class="p">(),</span>
         <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.analyse.analysis.benchmark_run_comparisons" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">benchmark_run_comparisons</span><span class="p">(</span><span class="n">results_directories</span><span class="p">,</span> <span class="n">score_order</span><span class="p">,</span> <span class="n">output_prefix</span><span class="p">,</span> <span class="n">threshold</span><span class="p">,</span> <span class="n">gene_analysis</span><span class="p">,</span> <span class="n">variant_analysis</span><span class="p">,</span> <span class="n">disease_analysis</span><span class="p">,</span> <span class="n">plot_type</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">benchmark_run_comparisons</span><span class="p">(</span><span class="n">results_directories</span><span class="p">,</span> <span class="n">score_order</span><span class="p">,</span> <span class="n">output_prefix</span><span class="p">,</span> <span class="n">threshold</span><span class="p">,</span> <span class="n">gene_analysis</span><span class="p">,</span> <span class="n">variant_analysis</span><span class="p">,</span> <span class="n">disease_analysis</span><span class="p">,</span> <span class="n">plot_type</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1509,6 +1546,8 @@ <h2 id="src.pheval.analyse.analysis.benchmark_run_comparisons" class="doc doc-he
   
       <p>Benchmark prioritisation performance for several runs.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1525,7 +1564,11 @@ <h2 id="src.pheval.analyse.analysis.benchmark_run_comparisons" class="doc doc-he
           <td>
                 <code><span title="typing.List">List</span>[<span title="pheval.analyse.run_data_parser.TrackInputOutputDirectories">TrackInputOutputDirectories</span>]</code>
           </td>
-          <td><p>Input and output directories for tracking results.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Input and output directories for tracking results.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1535,7 +1578,11 @@ <h2 id="src.pheval.analyse.analysis.benchmark_run_comparisons" class="doc doc-he
           <td>
                 <code>str</code>
           </td>
-          <td><p>The order in which scores are arranged, this can be either ascending or descending.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The order in which scores are arranged, this can be either ascending or descending.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1545,7 +1592,11 @@ <h2 id="src.pheval.analyse.analysis.benchmark_run_comparisons" class="doc doc-he
           <td>
                 <code>str</code>
           </td>
-          <td><p>Prefix for the benchmark output file names.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Prefix for the benchmark output file names.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1555,7 +1606,11 @@ <h2 id="src.pheval.analyse.analysis.benchmark_run_comparisons" class="doc doc-he
           <td>
                 <code>float</code>
           </td>
-          <td><p>The threshold for benchmark evaluation.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The threshold for benchmark evaluation.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1565,7 +1620,11 @@ <h2 id="src.pheval.analyse.analysis.benchmark_run_comparisons" class="doc doc-he
           <td>
                 <code>bool</code>
           </td>
-          <td><p>Boolean flag indicating whether to benchmark gene results.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Boolean flag indicating whether to benchmark gene results.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1575,7 +1634,11 @@ <h2 id="src.pheval.analyse.analysis.benchmark_run_comparisons" class="doc doc-he
           <td>
                 <code>bool</code>
           </td>
-          <td><p>Boolean flag indicating whether to benchmark variant results.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Boolean flag indicating whether to benchmark variant results.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1585,7 +1648,11 @@ <h2 id="src.pheval.analyse.analysis.benchmark_run_comparisons" class="doc doc-he
           <td>
                 <code>bool</code>
           </td>
-          <td><p>Boolean flag indicating whether to benchmark disease results.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Boolean flag indicating whether to benchmark disease results.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1595,7 +1662,11 @@ <h2 id="src.pheval.analyse.analysis.benchmark_run_comparisons" class="doc doc-he
           <td>
                 <code>str</code>
           </td>
-          <td><p>Type of plot for benchmark visualisation.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Type of plot for benchmark visualisation.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1603,9 +1674,9 @@ <h2 id="src.pheval.analyse.analysis.benchmark_run_comparisons" class="doc doc-he
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/analysis.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">144</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/analysis.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">144</span>
 <span class="normal">145</span>
 <span class="normal">146</span>
 <span class="normal">147</span>
@@ -1705,7 +1776,7 @@ <h2 id="src.pheval.analyse.analysis.benchmark_run_comparisons" class="doc doc-he
             <span class="n">benchmark_generator</span><span class="o">=</span><span class="n">DiseaseBenchmarkRunOutputGenerator</span><span class="p">(),</span>
         <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
diff --git a/api/pheval/analyse/benchmark_generator/index.html b/api/pheval/analyse/benchmark_generator/index.html
index 3a6fd255e..5d5b7993f 100644
--- a/api/pheval/analyse/benchmark_generator/index.html
+++ b/api/pheval/analyse/benchmark_generator/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/analyse/benchmark_generator/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -505,7 +505,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.benchmark_generator" class="md-nav__link">
-    src.pheval.analyse.benchmark_generator
+    benchmark_generator
   </a>
   
 </li>
@@ -1235,7 +1235,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.benchmark_generator" class="md-nav__link">
-    src.pheval.analyse.benchmark_generator
+    benchmark_generator
   </a>
   
 </li>
@@ -1293,6 +1293,7 @@ <h1>Benchmark generator</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.analyse.benchmark_generator"></a>
   <div class="doc doc-contents first">
 
@@ -1312,7 +1313,7 @@ <h1>Benchmark generator</h1>
 
 
 <h2 id="src.pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator" class="doc doc-heading">
-        <code>BenchmarkRunOutputGenerator</code>
+          <code>BenchmarkRunOutputGenerator</code>
 
   
   <span class="doc doc-labels">
@@ -1327,6 +1328,8 @@ <h2 id="src.pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator" clas
   
       <p>Base class for recording data required for generating benchmarking outputs.</p>
 
+
+
   <p><strong>Attributes:</strong></p>
   <table>
     <thead>
@@ -1338,42 +1341,57 @@ <h2 id="src.pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator" clas
     </thead>
     <tbody>
         <tr>
-          <td><code>prioritisation_type_file_prefix</code></td>
+          <td><code><span title="src.pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator.prioritisation_type_file_prefix">prioritisation_type_file_prefix</span></code></td>
           <td>
                 <code>str</code>
           </td>
-          <td><p>Prefix for the prioritisation type output file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Prefix for the prioritisation type output file.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>y_label</code></td>
+          <td><code><span title="src.pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator.y_label">y_label</span></code></td>
           <td>
                 <code>str</code>
           </td>
-          <td><p>Label for the y-axis in benchmarking outputs.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Label for the y-axis in benchmarking outputs.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>generate_benchmark_run_results</code></td>
+          <td><code><span title="src.pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator.generate_benchmark_run_results">generate_benchmark_run_results</span></code></td>
           <td>
                 <code><span title="typing.Callable">Callable</span></code>
           </td>
-          <td><p>Callable to generate benchmark run results.
+          <td>
+            <div class="doc-md-description">
+              <p>Callable to generate benchmark run results.
 Takes parameters: input and results directory, score order, threshold, rank comparison,
-and returns BenchmarkRunResults.</p></td>
+and returns BenchmarkRunResults.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>stats_comparison_file_suffix</code></td>
+          <td><code><span title="src.pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator.stats_comparison_file_suffix">stats_comparison_file_suffix</span></code></td>
           <td>
                 <code>str</code>
           </td>
-          <td><p>Suffix for the rank comparison file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Suffix for the rank comparison file.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/analyse/benchmark_generator.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">20</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/analyse/benchmark_generator.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">20</span>
 <span class="normal">21</span>
 <span class="normal">22</span>
 <span class="normal">23</span>
@@ -1411,7 +1429,7 @@ <h2 id="src.pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator" clas
     <span class="p">]</span>
     <span class="n">stats_comparison_file_suffix</span><span class="p">:</span> <span class="nb">str</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1431,6 +1449,7 @@ <h2 id="src.pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator" clas
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -1438,7 +1457,7 @@ <h2 id="src.pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator" clas
 
 
 <h2 id="src.pheval.analyse.benchmark_generator.DiseaseBenchmarkRunOutputGenerator" class="doc doc-heading">
-        <code>DiseaseBenchmarkRunOutputGenerator</code>
+          <code>DiseaseBenchmarkRunOutputGenerator</code>
 
   
   <span class="doc doc-labels">
@@ -1449,8 +1468,8 @@ <h2 id="src.pheval.analyse.benchmark_generator.DiseaseBenchmarkRunOutputGenerato
 
 
   <div class="doc doc-contents ">
-      <p class="doc doc-class-bases">
-        Bases: <code><a class="autorefs autorefs-internal" title="src.pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator" href="#src.pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator">BenchmarkRunOutputGenerator</a></code></p>
+          <p class="doc doc-class-bases">
+            Bases: <code><a class="autorefs autorefs-internal" title="src.pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator" href="#src.pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator">BenchmarkRunOutputGenerator</a></code></p>
 
   
       <p>Subclass of BenchmarkRunOutputGenerator specialised
@@ -1458,6 +1477,8 @@ <h2 id="src.pheval.analyse.benchmark_generator.DiseaseBenchmarkRunOutputGenerato
 <p>This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes
 specifically for disease prioritisation benchmarking.</p>
 
+
+
   <p><strong>Attributes:</strong></p>
   <table>
     <thead>
@@ -1469,46 +1490,61 @@ <h2 id="src.pheval.analyse.benchmark_generator.DiseaseBenchmarkRunOutputGenerato
     </thead>
     <tbody>
         <tr>
-          <td><code>prioritisation_type_file_prefix</code></td>
+          <td><code><span title="src.pheval.analyse.benchmark_generator.DiseaseBenchmarkRunOutputGenerator.prioritisation_type_file_prefix">prioritisation_type_file_prefix</span></code></td>
           <td>
                 <code>str</code>
           </td>
-          <td><p>Prefix for the disease prioritisation type file.
-Defaults to DISEASE_PLOT_FILE_PREFIX.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Prefix for the disease prioritisation type file.
+Defaults to DISEASE_PLOT_FILE_PREFIX.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>y_label</code></td>
+          <td><code><span title="src.pheval.analyse.benchmark_generator.DiseaseBenchmarkRunOutputGenerator.y_label">y_label</span></code></td>
           <td>
                 <code>str</code>
           </td>
-          <td><p>Label for the y-axis in disease prioritisation benchmarking outputs.
-Defaults to DISEASE_PLOT_Y_LABEL.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Label for the y-axis in disease prioritisation benchmarking outputs.
+Defaults to DISEASE_PLOT_Y_LABEL.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>generate_benchmark_run_results</code></td>
+          <td><code><span title="src.pheval.analyse.benchmark_generator.DiseaseBenchmarkRunOutputGenerator.generate_benchmark_run_results">generate_benchmark_run_results</span></code></td>
           <td>
                 <code><span title="typing.Callable">Callable</span></code>
           </td>
-          <td><p>Callable to generate disease prioritisation
+          <td>
+            <div class="doc-md-description">
+              <p>Callable to generate disease prioritisation
 benchmark run results. Defaults to benchmark_disease_prioritisation.
 Takes parameters: input and results directory, score order, threshold, rank comparison,
-and returns BenchmarkRunResults.</p></td>
+and returns BenchmarkRunResults.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>stats_comparison_file_suffix</code></td>
+          <td><code><span title="src.pheval.analyse.benchmark_generator.DiseaseBenchmarkRunOutputGenerator.stats_comparison_file_suffix">stats_comparison_file_suffix</span></code></td>
           <td>
                 <code>str</code>
           </td>
-          <td><p>Suffix for the disease rank comparison file.
-Defaults to "-disease_summary.tsv".</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Suffix for the disease rank comparison file.
+Defaults to "-disease_summary.tsv".</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/analyse/benchmark_generator.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">102</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/analyse/benchmark_generator.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">102</span>
 <span class="normal">103</span>
 <span class="normal">104</span>
 <span class="normal">105</span>
@@ -1564,7 +1600,7 @@ <h2 id="src.pheval.analyse.benchmark_generator.DiseaseBenchmarkRunOutputGenerato
     <span class="p">]</span> <span class="o">=</span> <span class="n">benchmark_disease_prioritisation</span>
     <span class="n">stats_comparison_file_suffix</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;-disease_summary.tsv&quot;</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1584,6 +1620,7 @@ <h2 id="src.pheval.analyse.benchmark_generator.DiseaseBenchmarkRunOutputGenerato
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -1591,7 +1628,7 @@ <h2 id="src.pheval.analyse.benchmark_generator.DiseaseBenchmarkRunOutputGenerato
 
 
 <h2 id="src.pheval.analyse.benchmark_generator.GeneBenchmarkRunOutputGenerator" class="doc doc-heading">
-        <code>GeneBenchmarkRunOutputGenerator</code>
+          <code>GeneBenchmarkRunOutputGenerator</code>
 
   
   <span class="doc doc-labels">
@@ -1602,8 +1639,8 @@ <h2 id="src.pheval.analyse.benchmark_generator.GeneBenchmarkRunOutputGenerator"
 
 
   <div class="doc doc-contents ">
-      <p class="doc doc-class-bases">
-        Bases: <code><a class="autorefs autorefs-internal" title="src.pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator" href="#src.pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator">BenchmarkRunOutputGenerator</a></code></p>
+          <p class="doc doc-class-bases">
+            Bases: <code><a class="autorefs autorefs-internal" title="src.pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator" href="#src.pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator">BenchmarkRunOutputGenerator</a></code></p>
 
   
       <p>Subclass of BenchmarkRunOutputGenerator specialised
@@ -1611,6 +1648,8 @@ <h2 id="src.pheval.analyse.benchmark_generator.GeneBenchmarkRunOutputGenerator"
 <p>This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes
 specifically for gene prioritisation benchmarking.</p>
 
+
+
   <p><strong>Attributes:</strong></p>
   <table>
     <thead>
@@ -1622,46 +1661,61 @@ <h2 id="src.pheval.analyse.benchmark_generator.GeneBenchmarkRunOutputGenerator"
     </thead>
     <tbody>
         <tr>
-          <td><code>prioritisation_type_file_prefix</code></td>
+          <td><code><span title="src.pheval.analyse.benchmark_generator.GeneBenchmarkRunOutputGenerator.prioritisation_type_file_prefix">prioritisation_type_file_prefix</span></code></td>
           <td>
                 <code>str</code>
           </td>
-          <td><p>Prefix for the gene prioritisation type file.
-Defaults to GENE_PLOT_FILE_PREFIX.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Prefix for the gene prioritisation type file.
+Defaults to GENE_PLOT_FILE_PREFIX.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>y_label</code></td>
+          <td><code><span title="src.pheval.analyse.benchmark_generator.GeneBenchmarkRunOutputGenerator.y_label">y_label</span></code></td>
           <td>
                 <code>str</code>
           </td>
-          <td><p>Label for the y-axis in gene prioritisation benchmarking outputs.
-Defaults to GENE_PLOT_Y_LABEL.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Label for the y-axis in gene prioritisation benchmarking outputs.
+Defaults to GENE_PLOT_Y_LABEL.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>generate_benchmark_run_results</code></td>
+          <td><code><span title="src.pheval.analyse.benchmark_generator.GeneBenchmarkRunOutputGenerator.generate_benchmark_run_results">generate_benchmark_run_results</span></code></td>
           <td>
                 <code><span title="typing.Callable">Callable</span></code>
           </td>
-          <td><p>Callable to generate gene prioritisation
+          <td>
+            <div class="doc-md-description">
+              <p>Callable to generate gene prioritisation
 benchmark run results. Defaults to benchmark_gene_prioritisation.
 Takes parameters: input and results directory, score order, threshold, rank comparison,
-and returns BenchmarkRunResults.</p></td>
+and returns BenchmarkRunResults.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>stats_comparison_file_suffix</code></td>
+          <td><code><span title="src.pheval.analyse.benchmark_generator.GeneBenchmarkRunOutputGenerator.stats_comparison_file_suffix">stats_comparison_file_suffix</span></code></td>
           <td>
                 <code>str</code>
           </td>
-          <td><p>Suffix for the gene rank comparison file.
-Defaults to "-gene_summary.tsv".</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Suffix for the gene rank comparison file.
+Defaults to "-gene_summary.tsv".</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/analyse/benchmark_generator.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">41</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/analyse/benchmark_generator.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">41</span>
 <span class="normal">42</span>
 <span class="normal">43</span>
 <span class="normal">44</span>
@@ -1717,7 +1771,7 @@ <h2 id="src.pheval.analyse.benchmark_generator.GeneBenchmarkRunOutputGenerator"
     <span class="p">]</span> <span class="o">=</span> <span class="n">benchmark_gene_prioritisation</span>
     <span class="n">stats_comparison_file_suffix</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;-gene_summary.tsv&quot;</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1737,6 +1791,7 @@ <h2 id="src.pheval.analyse.benchmark_generator.GeneBenchmarkRunOutputGenerator"
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -1744,7 +1799,7 @@ <h2 id="src.pheval.analyse.benchmark_generator.GeneBenchmarkRunOutputGenerator"
 
 
 <h2 id="src.pheval.analyse.benchmark_generator.VariantBenchmarkRunOutputGenerator" class="doc doc-heading">
-        <code>VariantBenchmarkRunOutputGenerator</code>
+          <code>VariantBenchmarkRunOutputGenerator</code>
 
   
   <span class="doc doc-labels">
@@ -1755,8 +1810,8 @@ <h2 id="src.pheval.analyse.benchmark_generator.VariantBenchmarkRunOutputGenerato
 
 
   <div class="doc doc-contents ">
-      <p class="doc doc-class-bases">
-        Bases: <code><a class="autorefs autorefs-internal" title="src.pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator" href="#src.pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator">BenchmarkRunOutputGenerator</a></code></p>
+          <p class="doc doc-class-bases">
+            Bases: <code><a class="autorefs autorefs-internal" title="src.pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator" href="#src.pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator">BenchmarkRunOutputGenerator</a></code></p>
 
   
       <p>Subclass of BenchmarkRunOutputGenerator specialised
@@ -1764,6 +1819,8 @@ <h2 id="src.pheval.analyse.benchmark_generator.VariantBenchmarkRunOutputGenerato
 <p>This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes
 specifically for variant prioritisation benchmarking.</p>
 
+
+
   <p><strong>Attributes:</strong></p>
   <table>
     <thead>
@@ -1775,46 +1832,61 @@ <h2 id="src.pheval.analyse.benchmark_generator.VariantBenchmarkRunOutputGenerato
     </thead>
     <tbody>
         <tr>
-          <td><code>prioritisation_type_file_prefix</code></td>
+          <td><code><span title="src.pheval.analyse.benchmark_generator.VariantBenchmarkRunOutputGenerator.prioritisation_type_file_prefix">prioritisation_type_file_prefix</span></code></td>
           <td>
                 <code>str</code>
           </td>
-          <td><p>Prefix for the variant prioritisation type file.
-Defaults to VARIANT_PLOT_FILE_PREFIX.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Prefix for the variant prioritisation type file.
+Defaults to VARIANT_PLOT_FILE_PREFIX.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>y_label</code></td>
+          <td><code><span title="src.pheval.analyse.benchmark_generator.VariantBenchmarkRunOutputGenerator.y_label">y_label</span></code></td>
           <td>
                 <code>str</code>
           </td>
-          <td><p>Label for the y-axis in variant prioritisation benchmarking outputs.
-Defaults to VARIANT_PLOT_Y_LABEL.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Label for the y-axis in variant prioritisation benchmarking outputs.
+Defaults to VARIANT_PLOT_Y_LABEL.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>generate_benchmark_run_results</code></td>
+          <td><code><span title="src.pheval.analyse.benchmark_generator.VariantBenchmarkRunOutputGenerator.generate_benchmark_run_results">generate_benchmark_run_results</span></code></td>
           <td>
                 <code><span title="typing.Callable">Callable</span></code>
           </td>
-          <td><p>Callable to generate variant prioritisation
+          <td>
+            <div class="doc-md-description">
+              <p>Callable to generate variant prioritisation
 benchmark run results. Defaults to benchmark_variant_prioritisation.
 Takes parameters: input and results directory, score order, threshold, rank comparison,
-and returns BenchmarkRunResults.</p></td>
+and returns BenchmarkRunResults.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>stats_comparison_file_suffix</code></td>
+          <td><code><span title="src.pheval.analyse.benchmark_generator.VariantBenchmarkRunOutputGenerator.stats_comparison_file_suffix">stats_comparison_file_suffix</span></code></td>
           <td>
                 <code>str</code>
           </td>
-          <td><p>Suffix for the variant rank comparison file.
-Defaults to "-variant_summary.tsv".</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Suffix for the variant rank comparison file.
+Defaults to "-variant_summary.tsv".</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/analyse/benchmark_generator.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">71</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/analyse/benchmark_generator.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">71</span>
 <span class="normal">72</span>
 <span class="normal">73</span>
 <span class="normal">74</span>
@@ -1872,7 +1944,7 @@ <h2 id="src.pheval.analyse.benchmark_generator.VariantBenchmarkRunOutputGenerato
     <span class="p">]</span> <span class="o">=</span> <span class="n">benchmark_variant_prioritisation</span>
     <span class="n">stats_comparison_file_suffix</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;-variant_summary.tsv&quot;</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1892,6 +1964,7 @@ <h2 id="src.pheval.analyse.benchmark_generator.VariantBenchmarkRunOutputGenerato
 
   </div>
 
+
 </div>
 
 
diff --git a/api/pheval/analyse/benchmarking_data/index.html b/api/pheval/analyse/benchmarking_data/index.html
index d3f624ed4..3377b1384 100644
--- a/api/pheval/analyse/benchmarking_data/index.html
+++ b/api/pheval/analyse/benchmarking_data/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/analyse/benchmarking_data/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -519,7 +519,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.benchmarking_data" class="md-nav__link">
-    src.pheval.analyse.benchmarking_data
+    benchmarking_data
   </a>
   
 </li>
@@ -1214,7 +1214,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.benchmarking_data" class="md-nav__link">
-    src.pheval.analyse.benchmarking_data
+    benchmarking_data
   </a>
   
 </li>
@@ -1251,6 +1251,7 @@ <h1>Benchmarking data</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.analyse.benchmarking_data"></a>
   <div class="doc doc-contents first">
 
@@ -1270,7 +1271,7 @@ <h1>Benchmarking data</h1>
 
 
 <h2 id="src.pheval.analyse.benchmarking_data.BenchmarkRunResults" class="doc doc-heading">
-        <code>BenchmarkRunResults</code>
+          <code>BenchmarkRunResults</code>
 
   
   <span class="doc doc-labels">
@@ -1285,6 +1286,8 @@ <h2 id="src.pheval.analyse.benchmarking_data.BenchmarkRunResults" class="doc doc
   
       <p>Benchmarking results for a run.</p>
 
+
+
   <p><strong>Attributes:</strong></p>
   <table>
     <thead>
@@ -1296,40 +1299,55 @@ <h2 id="src.pheval.analyse.benchmarking_data.BenchmarkRunResults" class="doc doc
     </thead>
     <tbody>
         <tr>
-          <td><code>ranks</code></td>
+          <td><code><span title="src.pheval.analyse.benchmarking_data.BenchmarkRunResults.ranks">ranks</span></code></td>
           <td>
                 <code>dict</code>
           </td>
-          <td><p>Dictionary containing recorded ranks for samples.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Dictionary containing recorded ranks for samples.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>rank_stats</code></td>
+          <td><code><span title="src.pheval.analyse.benchmarking_data.BenchmarkRunResults.rank_stats">rank_stats</span></code></td>
           <td>
                 <code><span title="pheval.analyse.rank_stats.RankStats">RankStats</span></code>
           </td>
-          <td><p>Statistics related to benchmark.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Statistics related to benchmark.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>results_dir</code></td>
+          <td><code><span title="src.pheval.analyse.benchmarking_data.BenchmarkRunResults.results_dir">results_dir</span></code></td>
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the result directory. Defaults to None.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the result directory. Defaults to None.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>benchmark_name</code></td>
+          <td><code><span title="src.pheval.analyse.benchmarking_data.BenchmarkRunResults.benchmark_name">benchmark_name</span></code></td>
           <td>
                 <code>str</code>
           </td>
-          <td><p>Name of the benchmark run. Defaults to None.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Name of the benchmark run. Defaults to None.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/analyse/benchmarking_data.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 8</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/analyse/benchmarking_data.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 8</span>
 <span class="normal"> 9</span>
 <span class="normal">10</span>
 <span class="normal">11</span>
@@ -1363,7 +1381,7 @@ <h2 id="src.pheval.analyse.benchmarking_data.BenchmarkRunResults" class="doc doc
     <span class="n">results_dir</span><span class="p">:</span> <span class="n">Path</span> <span class="o">=</span> <span class="kc">None</span>
     <span class="n">benchmark_name</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="kc">None</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1383,6 +1401,7 @@ <h2 id="src.pheval.analyse.benchmarking_data.BenchmarkRunResults" class="doc doc
 
   </div>
 
+
 </div>
 
 
diff --git a/api/pheval/analyse/binary_classification_stats/index.html b/api/pheval/analyse/binary_classification_stats/index.html
index 44e5c75c0..b1fd7b64e 100644
--- a/api/pheval/analyse/binary_classification_stats/index.html
+++ b/api/pheval/analyse/binary_classification_stats/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/analyse/binary_classification_stats/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -533,7 +533,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats" class="md-nav__link">
-    src.pheval.analyse.binary_classification_stats
+    binary_classification_stats
   </a>
   
 </li>
@@ -548,105 +548,105 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.accuracy" class="md-nav__link">
-    accuracy()
+    accuracy
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.add_classification" class="md-nav__link">
-    add_classification()
+    add_classification
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.add_classification_for_known_entities" class="md-nav__link">
-    add_classification_for_known_entities()
+    add_classification_for_known_entities
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.add_classification_for_other_entities" class="md-nav__link">
-    add_classification_for_other_entities()
+    add_classification_for_other_entities
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.add_labels_and_scores" class="md-nav__link">
-    add_labels_and_scores()
+    add_labels_and_scores
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.f1_score" class="md-nav__link">
-    f1_score()
+    f1_score
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.false_discovery_rate" class="md-nav__link">
-    false_discovery_rate()
+    false_discovery_rate
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.false_negative_rate" class="md-nav__link">
-    false_negative_rate()
+    false_negative_rate
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.false_positive_rate" class="md-nav__link">
-    false_positive_rate()
+    false_positive_rate
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.matthews_correlation_coefficient" class="md-nav__link">
-    matthews_correlation_coefficient()
+    matthews_correlation_coefficient
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.negative_predictive_value" class="md-nav__link">
-    negative_predictive_value()
+    negative_predictive_value
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.precision" class="md-nav__link">
-    precision()
+    precision
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.remove_relevant_ranks" class="md-nav__link">
-    remove_relevant_ranks()
+    remove_relevant_ranks
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.sensitivity" class="md-nav__link">
-    sensitivity()
+    sensitivity
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.specificity" class="md-nav__link">
-    specificity()
+    specificity
   </a>
   
 </li>
@@ -1325,7 +1325,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats" class="md-nav__link">
-    src.pheval.analyse.binary_classification_stats
+    binary_classification_stats
   </a>
   
 </li>
@@ -1340,105 +1340,105 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.accuracy" class="md-nav__link">
-    accuracy()
+    accuracy
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.add_classification" class="md-nav__link">
-    add_classification()
+    add_classification
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.add_classification_for_known_entities" class="md-nav__link">
-    add_classification_for_known_entities()
+    add_classification_for_known_entities
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.add_classification_for_other_entities" class="md-nav__link">
-    add_classification_for_other_entities()
+    add_classification_for_other_entities
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.add_labels_and_scores" class="md-nav__link">
-    add_labels_and_scores()
+    add_labels_and_scores
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.f1_score" class="md-nav__link">
-    f1_score()
+    f1_score
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.false_discovery_rate" class="md-nav__link">
-    false_discovery_rate()
+    false_discovery_rate
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.false_negative_rate" class="md-nav__link">
-    false_negative_rate()
+    false_negative_rate
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.false_positive_rate" class="md-nav__link">
-    false_positive_rate()
+    false_positive_rate
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.matthews_correlation_coefficient" class="md-nav__link">
-    matthews_correlation_coefficient()
+    matthews_correlation_coefficient
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.negative_predictive_value" class="md-nav__link">
-    negative_predictive_value()
+    negative_predictive_value
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.precision" class="md-nav__link">
-    precision()
+    precision
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.remove_relevant_ranks" class="md-nav__link">
-    remove_relevant_ranks()
+    remove_relevant_ranks
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.sensitivity" class="md-nav__link">
-    sensitivity()
+    sensitivity
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.specificity" class="md-nav__link">
-    specificity()
+    specificity
   </a>
   
 </li>
@@ -1473,6 +1473,7 @@ <h1>Binary classification stats</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.analyse.binary_classification_stats"></a>
   <div class="doc doc-contents first">
 
@@ -1492,7 +1493,7 @@ <h1>Binary classification stats</h1>
 
 
 <h2 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats" class="doc doc-heading">
-        <code>BinaryClassificationStats</code>
+          <code>BinaryClassificationStats</code>
 
   
   <span class="doc doc-labels">
@@ -1507,6 +1508,8 @@ <h2 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
   
       <p>A data class representing counts of different categories in binary classification.</p>
 
+
+
   <p><strong>Attributes:</strong></p>
   <table>
     <thead>
@@ -1518,44 +1521,59 @@ <h2 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
     </thead>
     <tbody>
         <tr>
-          <td><code>true_positives</code></td>
+          <td><code><span title="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.true_positives">true_positives</span></code></td>
           <td>
                 <code>int</code>
           </td>
-          <td><p>The count of true positive instances - i.e., the number of known entities
-ranked 1 in the results.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The count of true positive instances - i.e., the number of known entities
+ranked 1 in the results.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>true_negatives</code></td>
+          <td><code><span title="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.true_negatives">true_negatives</span></code></td>
           <td>
                 <code>int</code>
           </td>
-          <td><p>The count of true negative instances - i.e., the number of non-relevant entities
-ranked at a position other than 1 in the results.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The count of true negative instances - i.e., the number of non-relevant entities
+ranked at a position other than 1 in the results.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>false_positives</code></td>
+          <td><code><span title="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.false_positives">false_positives</span></code></td>
           <td>
                 <code>int</code>
           </td>
-          <td><p>The count of false positive instances - i.e., the number of non-relevant entities
-ranked at position 1 in the results.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The count of false positive instances - i.e., the number of non-relevant entities
+ranked at position 1 in the results.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>false_negatives</code></td>
+          <td><code><span title="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.false_negatives">false_negatives</span></code></td>
           <td>
                 <code>int</code>
           </td>
-          <td><p>The count of false negative instances - i.e., the number of known entities
-ranked at a position other than 1 in the results.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The count of false negative instances - i.e., the number of known entities
+ranked at a position other than 1 in the results.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 12</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 12</span>
 <span class="normal"> 13</span>
 <span class="normal"> 14</span>
 <span class="normal"> 15</span>
@@ -2191,7 +2209,7 @@ <h2 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
             <span class="k">else</span> <span class="mf">0.0</span>
         <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -2205,12 +2223,13 @@ <h2 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.accuracy" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">accuracy</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">accuracy</span><span class="p">()</span></code>
 
 </h3>
 
@@ -2220,6 +2239,8 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
       <p>Calculate Accuracy.</p>
 <p>Accuracy measures the proportion of correctly predicted instances out of all instances.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2233,26 +2254,38 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
 <td><code>float</code></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>The Accuracy of the model, calculated as the sum of true positives and true negatives divided by</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The Accuracy of the model, calculated as the sum of true positives and true negatives divided by</p>
+            </div>
+          </td>
         </tr>
         <tr>
 <td></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>the sum of true positives, false positives, true negatives, and false negatives.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>the sum of true positives, false positives, true negatives, and false negatives.</p>
+            </div>
+          </td>
         </tr>
         <tr>
 <td></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>Returns 0.0 if the total sum of counts is zero.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Returns 0.0 if the total sum of counts is zero.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">250</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">250</span>
 <span class="normal">251</span>
 <span class="normal">252</span>
 <span class="normal">253</span>
@@ -2308,63 +2341,34 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
         <span class="k">else</span> <span class="mf">0.0</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.add_classification" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">add_classification</span><span class="p">(</span><span class="n">pheval_results</span><span class="p">,</span> <span class="n">relevant_ranks</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">add_classification</span><span class="p">(</span><span class="n">pheval_results</span><span class="p">,</span> <span class="n">relevant_ranks</span><span class="p">)</span></code>
 
 </h3>
 
 
   <div class="doc doc-contents ">
   
-      <p>Update binary classification metrics for known and unknown entities based on their ranks.</p>
-
-  <p><strong>Parameters:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Name</th>
-        <th>Type</th>
-        <th>Description</th>
-        <th>Default</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-          <td><code>pheval_results</code></td>
-          <td>
-                <code><span title="typing.Union">Union</span>[<span title="typing.List">List</span>[<span title="pheval.post_processing.post_processing.RankedPhEvalGeneResult">RankedPhEvalGeneResult</span>], <span title="typing.List">List</span>[<span title="pheval.post_processing.post_processing.RankedPhEvalVariantResult">RankedPhEvalVariantResult</span>], <span title="typing.List">List</span>[<span title="pheval.post_processing.post_processing.RankedPhEvalDiseaseResult">RankedPhEvalDiseaseResult</span>]]</code>
-          </td>
-          <td><p>(Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]):
-    The list of all pheval results.</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>relevant_ranks</code></td>
-          <td>
-                <code><span title="typing.List">List</span>[int]</code>
-          </td>
-          <td><p>A list of the ranks associated with the known entities.</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-    </tbody>
-  </table>
+      <p>Update binary classification metrics for known and unknown entities based on their ranks.
+Args:
+    pheval_results:
+        (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]):
+            The list of all pheval results.
+    relevant_ranks (List[int]): A list of the ranks associated with the known entities.</p>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">114</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">114</span>
 <span class="normal">115</span>
 <span class="normal">116</span>
 <span class="normal">117</span>
@@ -2408,17 +2412,18 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
     <span class="p">)</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">add_labels_and_scores</span><span class="p">(</span><span class="n">pheval_results</span><span class="p">,</span> <span class="n">relevant_ranks</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.add_classification_for_known_entities" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">add_classification_for_known_entities</span><span class="p">(</span><span class="n">relevant_ranks</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">add_classification_for_known_entities</span><span class="p">(</span><span class="n">relevant_ranks</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2427,6 +2432,8 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
   
       <p>Update binary classification metrics for known entities based on their ranking.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2443,7 +2450,11 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
           <td>
                 <code><span title="typing.List">List</span>[int]</code>
           </td>
-          <td><p>A list of the ranks associated with the known entities.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>A list of the ranks associated with the known entities.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2451,9 +2462,9 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">63</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">63</span>
 <span class="normal">64</span>
 <span class="normal">65</span>
 <span class="normal">66</span>
@@ -2477,17 +2488,18 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
         <span class="k">elif</span> <span class="n">rank</span> <span class="o">!=</span> <span class="mi">1</span><span class="p">:</span>
             <span class="bp">self</span><span class="o">.</span><span class="n">false_negatives</span> <span class="o">+=</span> <span class="mi">1</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.add_classification_for_other_entities" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">add_classification_for_other_entities</span><span class="p">(</span><span class="n">ranks</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">add_classification_for_other_entities</span><span class="p">(</span><span class="n">ranks</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2496,6 +2508,8 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
   
       <p>Update binary classification metrics for other entities based on their ranking.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2512,7 +2526,11 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
           <td>
                 <code><span title="typing.List">List</span>[int]</code>
           </td>
-          <td><p>A list of the ranks for all other entities.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>A list of the ranks for all other entities.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2520,9 +2538,9 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">76</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">76</span>
 <span class="normal">77</span>
 <span class="normal">78</span>
 <span class="normal">79</span>
@@ -2546,17 +2564,18 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
         <span class="k">elif</span> <span class="n">rank</span> <span class="o">!=</span> <span class="mi">1</span><span class="p">:</span>
             <span class="bp">self</span><span class="o">.</span><span class="n">true_negatives</span> <span class="o">+=</span> <span class="mi">1</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.add_labels_and_scores" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">add_labels_and_scores</span><span class="p">(</span><span class="n">pheval_results</span><span class="p">,</span> <span class="n">relevant_ranks</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">add_labels_and_scores</span><span class="p">(</span><span class="n">pheval_results</span><span class="p">,</span> <span class="n">relevant_ranks</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2565,6 +2584,8 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
   
       <p>Adds scores and labels from the PhEval results.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2581,7 +2602,11 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
           <td>
                 <code><span title="typing.List">List</span>[int]</code>
           </td>
-          <td><p>A list of the ranks associated with the known entities.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>A list of the ranks associated with the known entities.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2589,9 +2614,9 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 89</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 89</span>
 <span class="normal"> 90</span>
 <span class="normal"> 91</span>
 <span class="normal"> 92</span>
@@ -2639,17 +2664,18 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
         <span class="bp">self</span><span class="o">.</span><span class="n">labels</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">label</span><span class="p">)</span>
         <span class="n">relevant_ranks_copy</span><span class="o">.</span><span class="n">remove</span><span class="p">(</span><span class="n">result</span><span class="o">.</span><span class="n">rank</span><span class="p">)</span> <span class="k">if</span> <span class="n">label</span> <span class="o">==</span> <span class="mi">1</span> <span class="k">else</span> <span class="kc">None</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.f1_score" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">f1_score</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">f1_score</span><span class="p">()</span></code>
 
 </h3>
 
@@ -2660,6 +2686,8 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
 <p>F1 Score is the harmonic mean of precision and recall, providing a balance between false positives
 and false negatives.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2673,20 +2701,28 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
 <td><code>float</code></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>The F1 Score of the model, calculated as 2 * TP / (2 * TP + FP + FN).</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The F1 Score of the model, calculated as 2 * TP / (2 * TP + FP + FN).</p>
+            </div>
+          </td>
         </tr>
         <tr>
 <td></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>Returns 0.0 if the denominator is zero.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Returns 0.0 if the denominator is zero.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">279</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">279</span>
 <span class="normal">280</span>
 <span class="normal">281</span>
 <span class="normal">282</span>
@@ -2720,17 +2756,18 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
         <span class="k">else</span> <span class="mf">0.0</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.false_discovery_rate" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">false_discovery_rate</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">false_discovery_rate</span><span class="p">()</span></code>
 
 </h3>
 
@@ -2740,6 +2777,8 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
       <p>Calculate False Discovery Rate (FDR).</p>
 <p>FDR measures the proportion of instances predicted as positive that are actually negative.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2753,20 +2792,28 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
 <td><code>float</code></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>The False Discovery Rate of the model, calculated as false positives divided by the sum of</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The False Discovery Rate of the model, calculated as false positives divided by the sum of</p>
+            </div>
+          </td>
         </tr>
         <tr>
 <td></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>false positives and true positives. Returns 0.0 if both false positives and true positives are zero.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>false positives and true positives. Returns 0.0 if both false positives and true positives are zero.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">218</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">218</span>
 <span class="normal">219</span>
 <span class="normal">220</span>
 <span class="normal">221</span>
@@ -2796,17 +2843,18 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
         <span class="k">else</span> <span class="mf">0.0</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.false_negative_rate" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">false_negative_rate</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">false_negative_rate</span><span class="p">()</span></code>
 
 </h3>
 
@@ -2816,6 +2864,8 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
       <p>Calculate False Negative Rate (FNR).</p>
 <p>FNR measures the proportion of instances that are actually positive but predicted as negative.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2829,20 +2879,28 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
 <td><code>float</code></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>The False Negative Rate of the model, calculated as false negatives divided by the sum of</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The False Negative Rate of the model, calculated as false negatives divided by the sum of</p>
+            </div>
+          </td>
         </tr>
         <tr>
 <td></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>false negatives and true positives. Returns 0.0 if both false negatives and true positives are zero.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>false negatives and true positives. Returns 0.0 if both false negatives and true positives are zero.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">234</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">234</span>
 <span class="normal">235</span>
 <span class="normal">236</span>
 <span class="normal">237</span>
@@ -2872,17 +2930,18 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
         <span class="k">else</span> <span class="mf">0.0</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.false_positive_rate" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">false_positive_rate</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">false_positive_rate</span><span class="p">()</span></code>
 
 </h3>
 
@@ -2892,6 +2951,8 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
       <p>Calculate False Positive Rate (FPR).</p>
 <p>FPR measures the proportion of instances predicted as positive that are actually negative.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2905,20 +2966,28 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
 <td><code>float</code></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>The False Positive Rate of the model, calculated as false positives divided by the sum of</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The False Positive Rate of the model, calculated as false positives divided by the sum of</p>
+            </div>
+          </td>
         </tr>
         <tr>
 <td></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>false positives and true negatives. Returns 0.0 if both false positives and true negatives are zero.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>false positives and true negatives. Returns 0.0 if both false positives and true negatives are zero.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">202</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">202</span>
 <span class="normal">203</span>
 <span class="normal">204</span>
 <span class="normal">205</span>
@@ -2948,17 +3017,18 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
         <span class="k">else</span> <span class="mf">0.0</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.matthews_correlation_coefficient" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">matthews_correlation_coefficient</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">matthews_correlation_coefficient</span><span class="p">()</span></code>
 
 </h3>
 
@@ -2968,6 +3038,8 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
       <p>Calculate Matthews Correlation Coefficient (MCC).</p>
 <p>MCC is a measure of the quality of binary classifications, accounting for imbalances in the data.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2981,26 +3053,38 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
 <td><code>float</code></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>The Matthews Correlation Coefficient of the model, calculated as</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The Matthews Correlation Coefficient of the model, calculated as</p>
+            </div>
+          </td>
         </tr>
         <tr>
 <td></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>((TP * TN) - (FP * FN)) / sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)).</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>((TP * TN) - (FP * FN)) / sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)).</p>
+            </div>
+          </td>
         </tr>
         <tr>
 <td></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>Returns 0.0 if the denominator is zero.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Returns 0.0 if the denominator is zero.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">297</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">297</span>
 <span class="normal">298</span>
 <span class="normal">299</span>
 <span class="normal">300</span>
@@ -3066,17 +3150,18 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
         <span class="k">else</span> <span class="mf">0.0</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.negative_predictive_value" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">negative_predictive_value</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">negative_predictive_value</span><span class="p">()</span></code>
 
 </h3>
 
@@ -3086,6 +3171,8 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
       <p>Calculate Negative Predictive Value (NPV).</p>
 <p>NPV measures the proportion of correctly predicted negative instances out of all instances predicted negative.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -3099,20 +3186,28 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
 <td><code>float</code></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>The Negative Predictive Value of the model, calculated as true negatives divided by the sum of</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The Negative Predictive Value of the model, calculated as true negatives divided by the sum of</p>
+            </div>
+          </td>
         </tr>
         <tr>
 <td></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>true negatives and false negatives. Returns 0.0 if both true negatives and false negatives are zero.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>true negatives and false negatives. Returns 0.0 if both true negatives and false negatives are zero.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">186</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">186</span>
 <span class="normal">187</span>
 <span class="normal">188</span>
 <span class="normal">189</span>
@@ -3142,17 +3237,18 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
         <span class="k">else</span> <span class="mf">0.0</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.precision" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">precision</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">precision</span><span class="p">()</span></code>
 
 </h3>
 
@@ -3163,6 +3259,8 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
 <p>Precision measures the proportion of correctly predicted positive instances out of all instances
 predicted as positive.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -3176,20 +3274,28 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
 <td><code>float</code></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>The precision of the model, calculated as true positives divided by the sum of true positives</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The precision of the model, calculated as true positives divided by the sum of true positives</p>
+            </div>
+          </td>
         </tr>
         <tr>
 <td></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>and false positives. Returns 0.0 if both true positives and false positives are zero.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>and false positives. Returns 0.0 if both true positives and false positives are zero.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">169</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">169</span>
 <span class="normal">170</span>
 <span class="normal">171</span>
 <span class="normal">172</span>
@@ -3221,17 +3327,18 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
         <span class="k">else</span> <span class="mf">0.0</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.remove_relevant_ranks" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">remove_relevant_ranks</span><span class="p">(</span><span class="n">pheval_results</span><span class="p">,</span> <span class="n">relevant_ranks</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">remove_relevant_ranks</span><span class="p">(</span><span class="n">pheval_results</span><span class="p">,</span> <span class="n">relevant_ranks</span><span class="p">)</span></code>
   
   <span class="doc doc-labels">
       <small class="doc doc-label doc-label-staticmethod"><code>staticmethod</code></small>
@@ -3242,42 +3349,14 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
 
   <div class="doc doc-contents ">
   
-      <p>Remove the relevant entity ranks from all result ranks</p>
+      <p>Remove the relevant entity ranks from all result ranks
+Args:
+    pheval_results:
+        (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]):
+            The list of all pheval results.
+    relevant_ranks (List[int]): A list of the ranks associated with the known entities.</p>
+
 
-  <p><strong>Parameters:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Name</th>
-        <th>Type</th>
-        <th>Description</th>
-        <th>Default</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-          <td><code>pheval_results</code></td>
-          <td>
-                <code><span title="typing.Union">Union</span>[<span title="typing.List">List</span>[<span title="pheval.post_processing.post_processing.RankedPhEvalGeneResult">RankedPhEvalGeneResult</span>], <span title="typing.List">List</span>[<span title="pheval.post_processing.post_processing.RankedPhEvalVariantResult">RankedPhEvalVariantResult</span>], <span title="typing.List">List</span>[<span title="pheval.post_processing.post_processing.RankedPhEvalDiseaseResult">RankedPhEvalDiseaseResult</span>]]</code>
-          </td>
-          <td><p>(Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]):
-    The list of all pheval results.</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>relevant_ranks</code></td>
-          <td>
-                <code><span title="typing.List">List</span>[int]</code>
-          </td>
-          <td><p>A list of the ranks associated with the known entities.</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-    </tbody>
-  </table>
 
   <p><strong>Returns:</strong></p>
   <table>
@@ -3292,14 +3371,18 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
           <td>
                 <code><span title="typing.List">List</span>[int]</code>
           </td>
-          <td><p>List[int]: A list of the ranks with the relevant entity ranks removed.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List[int]: A list of the ranks with the relevant entity ranks removed.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">35</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">35</span>
 <span class="normal">36</span>
 <span class="normal">37</span>
 <span class="normal">38</span>
@@ -3353,17 +3436,18 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
             <span class="k">continue</span>
     <span class="k">return</span> <span class="n">all_result_ranks</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.sensitivity" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">sensitivity</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">sensitivity</span><span class="p">()</span></code>
 
 </h3>
 
@@ -3373,6 +3457,8 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
       <p>Calculate sensitivity.</p>
 <p>Sensitivity measures the proportion of actual positive instances correctly identified by the model.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -3386,20 +3472,28 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
 <td><code>float</code></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>The sensitivity of the model, calculated as true positives divided by the sum of true positives</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The sensitivity of the model, calculated as true positives divided by the sum of true positives</p>
+            </div>
+          </td>
         </tr>
         <tr>
 <td></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>and false negatives. Returns 0 if both true positives and false negatives are zero.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>and false negatives. Returns 0 if both true positives and false negatives are zero.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">137</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">137</span>
 <span class="normal">138</span>
 <span class="normal">139</span>
 <span class="normal">140</span>
@@ -3429,17 +3523,18 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
         <span class="k">else</span> <span class="mf">0.0</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.specificity" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">specificity</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">specificity</span><span class="p">()</span></code>
 
 </h3>
 
@@ -3449,6 +3544,8 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
       <p>Calculate specificity.</p>
 <p>Specificity measures the proportion of actual negative instances correctly identified by the model.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -3462,20 +3559,28 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
 <td><code>float</code></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>The specificity of the model, calculated as true negatives divided by the sum of true negatives</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The specificity of the model, calculated as true negatives divided by the sum of true negatives</p>
+            </div>
+          </td>
         </tr>
         <tr>
 <td></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>and false positives. Returns 0.0 if both true negatives and false positives are zero.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>and false positives. Returns 0.0 if both true negatives and false positives are zero.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">153</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/binary_classification_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">153</span>
 <span class="normal">154</span>
 <span class="normal">155</span>
 <span class="normal">156</span>
@@ -3505,7 +3610,7 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
         <span class="k">else</span> <span class="mf">0.0</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
@@ -3516,6 +3621,7 @@ <h3 id="src.pheval.analyse.binary_classification_stats.BinaryClassificationStats
 
   </div>
 
+
 </div>
 
 
diff --git a/api/pheval/analyse/disease_prioritisation_analysis/index.html b/api/pheval/analyse/disease_prioritisation_analysis/index.html
index 048ec5a3b..6a3857a73 100644
--- a/api/pheval/analyse/disease_prioritisation_analysis/index.html
+++ b/api/pheval/analyse/disease_prioritisation_analysis/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/analyse/disease_prioritisation_analysis/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -547,7 +547,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.disease_prioritisation_analysis" class="md-nav__link">
-    src.pheval.analyse.disease_prioritisation_analysis
+    disease_prioritisation_analysis
   </a>
   
 </li>
@@ -562,14 +562,14 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioritisation.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioritisation.assess_disease_prioritisation" class="md-nav__link">
-    assess_disease_prioritisation()
+    assess_disease_prioritisation
   </a>
   
 </li>
@@ -581,14 +581,14 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.disease_prioritisation_analysis.assess_phenopacket_disease_prioritisation" class="md-nav__link">
-    assess_phenopacket_disease_prioritisation()
+    assess_phenopacket_disease_prioritisation
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.disease_prioritisation_analysis.benchmark_disease_prioritisation" class="md-nav__link">
-    benchmark_disease_prioritisation()
+    benchmark_disease_prioritisation
   </a>
   
 </li>
@@ -1248,7 +1248,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.disease_prioritisation_analysis" class="md-nav__link">
-    src.pheval.analyse.disease_prioritisation_analysis
+    disease_prioritisation_analysis
   </a>
   
 </li>
@@ -1263,14 +1263,14 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioritisation.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioritisation.assess_disease_prioritisation" class="md-nav__link">
-    assess_disease_prioritisation()
+    assess_disease_prioritisation
   </a>
   
 </li>
@@ -1282,14 +1282,14 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.disease_prioritisation_analysis.assess_phenopacket_disease_prioritisation" class="md-nav__link">
-    assess_phenopacket_disease_prioritisation()
+    assess_phenopacket_disease_prioritisation
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.disease_prioritisation_analysis.benchmark_disease_prioritisation" class="md-nav__link">
-    benchmark_disease_prioritisation()
+    benchmark_disease_prioritisation
   </a>
   
 </li>
@@ -1319,6 +1319,7 @@ <h1>Disease prioritisation analysis</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.analyse.disease_prioritisation_analysis"></a>
   <div class="doc doc-contents first">
 
@@ -1338,7 +1339,7 @@ <h1>Disease prioritisation analysis</h1>
 
 
 <h2 id="src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioritisation" class="doc doc-heading">
-        <code>AssessDiseasePrioritisation</code>
+          <code>AssessDiseasePrioritisation</code>
 
 
 </h2>
@@ -1349,10 +1350,9 @@ <h2 id="src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioriti
   
       <p>Class for assessing disease prioritisation based on thresholds and scoring orders.</p>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/analyse/disease_prioritisation_analysis.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 17</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/analyse/disease_prioritisation_analysis.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 17</span>
 <span class="normal"> 18</span>
 <span class="normal"> 19</span>
 <span class="normal"> 20</span>
@@ -1714,7 +1714,7 @@ <h2 id="src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioriti
             <span class="bp">self</span><span class="o">.</span><span class="n">standardised_disease_results</span><span class="p">,</span> <span class="n">relevant_ranks</span>
         <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1728,12 +1728,13 @@ <h2 id="src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioriti
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioritisation.__init__" class="doc doc-heading">
-<code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">results_dir</span><span class="p">,</span> <span class="n">standardised_disease_results</span><span class="p">,</span> <span class="n">threshold</span><span class="p">,</span> <span class="n">score_order</span><span class="p">,</span> <span class="n">proband_diseases</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">results_dir</span><span class="p">,</span> <span class="n">standardised_disease_results</span><span class="p">,</span> <span class="n">threshold</span><span class="p">,</span> <span class="n">score_order</span><span class="p">,</span> <span class="n">proband_diseases</span><span class="p">)</span></code>
 
 </h3>
 
@@ -1742,6 +1743,8 @@ <h3 id="src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioriti
   
       <p>Initialise AssessDiseasePrioritisation class</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1758,7 +1761,11 @@ <h3 id="src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioriti
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the phenopacket file</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the phenopacket file</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1768,7 +1775,11 @@ <h3 id="src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioriti
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the results directory</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the results directory</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1778,7 +1789,11 @@ <h3 id="src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioriti
           <td>
                 <code><span title="typing.List">List</span>[<span title="pheval.post_processing.post_processing.RankedPhEvalDiseaseResult">RankedPhEvalDiseaseResult</span>]</code>
           </td>
-          <td><p>List of ranked PhEval disease results</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List of ranked PhEval disease results</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1788,7 +1803,11 @@ <h3 id="src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioriti
           <td>
                 <code>float</code>
           </td>
-          <td><p>Threshold for scores</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Threshold for scores</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1798,7 +1817,11 @@ <h3 id="src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioriti
           <td>
                 <code>str</code>
           </td>
-          <td><p>Score order for results, either ascending or descending</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Score order for results, either ascending or descending</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1808,7 +1831,11 @@ <h3 id="src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioriti
           <td>
                 <code><span title="typing.List">List</span>[<span title="pheval.utils.phenopacket_utils.ProbandDisease">ProbandDisease</span>]</code>
           </td>
-          <td><p>List of proband diseases</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List of proband diseases</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1816,9 +1843,9 @@ <h3 id="src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioriti
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/disease_prioritisation_analysis.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">20</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/disease_prioritisation_analysis.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">20</span>
 <span class="normal">21</span>
 <span class="normal">22</span>
 <span class="normal">23</span>
@@ -1872,17 +1899,18 @@ <h3 id="src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioriti
     <span class="bp">self</span><span class="o">.</span><span class="n">score_order</span> <span class="o">=</span> <span class="n">score_order</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">proband_diseases</span> <span class="o">=</span> <span class="n">proband_diseases</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioritisation.assess_disease_prioritisation" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">assess_disease_prioritisation</span><span class="p">(</span><span class="n">rank_stats</span><span class="p">,</span> <span class="n">rank_records</span><span class="p">,</span> <span class="n">binary_classification_stats</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">assess_disease_prioritisation</span><span class="p">(</span><span class="n">rank_stats</span><span class="p">,</span> <span class="n">rank_records</span><span class="p">,</span> <span class="n">binary_classification_stats</span><span class="p">)</span></code>
 
 </h3>
 
@@ -1893,6 +1921,8 @@ <h3 id="src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioriti
 <p>This method assesses the prioritisation of diseases based on the provided criteria
 and records ranks using a PrioritisationRankRecorder.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1909,7 +1939,11 @@ <h3 id="src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioriti
           <td>
                 <code><span title="pheval.analyse.rank_stats.RankStats">RankStats</span></code>
           </td>
-          <td><p>RankStats class instance</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>RankStats class instance</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1919,7 +1953,11 @@ <h3 id="src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioriti
           <td>
                 <code><span title="collections.defaultdict">defaultdict</span></code>
           </td>
-          <td><p>A defaultdict to store the correct ranked results.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>A defaultdict to store the correct ranked results.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1929,7 +1967,11 @@ <h3 id="src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioriti
           <td>
                 <code><span title="pheval.analyse.binary_classification_stats.BinaryClassificationStats">BinaryClassificationStats</span></code>
           </td>
-          <td><p>BinaryClassificationStats class instance.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>BinaryClassificationStats class instance.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1937,9 +1979,9 @@ <h3 id="src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioriti
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/disease_prioritisation_analysis.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">149</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/disease_prioritisation_analysis.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">149</span>
 <span class="normal">150</span>
 <span class="normal">151</span>
 <span class="normal">152</span>
@@ -2037,7 +2079,7 @@ <h3 id="src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioriti
         <span class="bp">self</span><span class="o">.</span><span class="n">standardised_disease_results</span><span class="p">,</span> <span class="n">relevant_ranks</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
@@ -2048,15 +2090,17 @@ <h3 id="src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioriti
 
   </div>
 
+
 </div>
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.analyse.disease_prioritisation_analysis.assess_phenopacket_disease_prioritisation" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">assess_phenopacket_disease_prioritisation</span><span class="p">(</span><span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">score_order</span><span class="p">,</span> <span class="n">results_dir_and_input</span><span class="p">,</span> <span class="n">threshold</span><span class="p">,</span> <span class="n">disease_rank_stats</span><span class="p">,</span> <span class="n">disease_rank_comparison</span><span class="p">,</span> <span class="n">disease_binary_classification_stats</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">assess_phenopacket_disease_prioritisation</span><span class="p">(</span><span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">score_order</span><span class="p">,</span> <span class="n">results_dir_and_input</span><span class="p">,</span> <span class="n">threshold</span><span class="p">,</span> <span class="n">disease_rank_stats</span><span class="p">,</span> <span class="n">disease_rank_comparison</span><span class="p">,</span> <span class="n">disease_binary_classification_stats</span><span class="p">)</span></code>
 
 </h2>
 
@@ -2066,6 +2110,8 @@ <h2 id="src.pheval.analyse.disease_prioritisation_analysis.assess_phenopacket_di
       <p>Assess disease prioritisation for a Phenopacket by comparing PhEval standardised disease results
 against the recorded causative diseases for a proband in the Phenopacket.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2082,7 +2128,11 @@ <h2 id="src.pheval.analyse.disease_prioritisation_analysis.assess_phenopacket_di
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the Phenopacket.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the Phenopacket.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2092,7 +2142,11 @@ <h2 id="src.pheval.analyse.disease_prioritisation_analysis.assess_phenopacket_di
           <td>
                 <code>str</code>
           </td>
-          <td><p>The order in which scores are arranged, either ascending or descending.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The order in which scores are arranged, either ascending or descending.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2102,7 +2156,11 @@ <h2 id="src.pheval.analyse.disease_prioritisation_analysis.assess_phenopacket_di
           <td>
                 <code><span title="pheval.analyse.run_data_parser.TrackInputOutputDirectories">TrackInputOutputDirectories</span></code>
           </td>
-          <td><p>Input and output directories.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Input and output directories.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2112,7 +2170,11 @@ <h2 id="src.pheval.analyse.disease_prioritisation_analysis.assess_phenopacket_di
           <td>
                 <code>float</code>
           </td>
-          <td><p>Threshold for assessment.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Threshold for assessment.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2122,7 +2184,11 @@ <h2 id="src.pheval.analyse.disease_prioritisation_analysis.assess_phenopacket_di
           <td>
                 <code><span title="pheval.analyse.rank_stats.RankStats">RankStats</span></code>
           </td>
-          <td><p>RankStats class instance.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>RankStats class instance.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2132,7 +2198,11 @@ <h2 id="src.pheval.analyse.disease_prioritisation_analysis.assess_phenopacket_di
           <td>
                 <code><span title="collections.defaultdict">defaultdict</span></code>
           </td>
-          <td><p>Default dictionary for disease rank comparisons.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Default dictionary for disease rank comparisons.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2142,7 +2212,11 @@ <h2 id="src.pheval.analyse.disease_prioritisation_analysis.assess_phenopacket_di
           <td>
                 <code><span title="pheval.analyse.binary_classification_stats.BinaryClassificationStats">BinaryClassificationStats</span></code>
           </td>
-          <td><p>BinaryClassificationStats class instance.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>BinaryClassificationStats class instance.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2150,9 +2224,9 @@ <h2 id="src.pheval.analyse.disease_prioritisation_analysis.assess_phenopacket_di
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/disease_prioritisation_analysis.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">215</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/disease_prioritisation_analysis.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">215</span>
 <span class="normal">216</span>
 <span class="normal">217</span>
 <span class="normal">218</span>
@@ -2226,17 +2300,18 @@ <h2 id="src.pheval.analyse.disease_prioritisation_analysis.assess_phenopacket_di
         <span class="n">disease_rank_stats</span><span class="p">,</span> <span class="n">disease_rank_comparison</span><span class="p">,</span> <span class="n">disease_binary_classification_stats</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.analyse.disease_prioritisation_analysis.benchmark_disease_prioritisation" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">benchmark_disease_prioritisation</span><span class="p">(</span><span class="n">results_directory_and_input</span><span class="p">,</span> <span class="n">score_order</span><span class="p">,</span> <span class="n">threshold</span><span class="p">,</span> <span class="n">disease_rank_comparison</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">benchmark_disease_prioritisation</span><span class="p">(</span><span class="n">results_directory_and_input</span><span class="p">,</span> <span class="n">score_order</span><span class="p">,</span> <span class="n">threshold</span><span class="p">,</span> <span class="n">disease_rank_comparison</span><span class="p">)</span></code>
 
 </h2>
 
@@ -2245,6 +2320,8 @@ <h2 id="src.pheval.analyse.disease_prioritisation_analysis.benchmark_disease_pri
   
       <p>Benchmark a directory based on disease prioritisation results.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2261,7 +2338,11 @@ <h2 id="src.pheval.analyse.disease_prioritisation_analysis.benchmark_disease_pri
           <td>
                 <code><span title="pheval.analyse.run_data_parser.TrackInputOutputDirectories">TrackInputOutputDirectories</span></code>
           </td>
-          <td><p>Input and output directories.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Input and output directories.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2271,7 +2352,11 @@ <h2 id="src.pheval.analyse.disease_prioritisation_analysis.benchmark_disease_pri
           <td>
                 <code>str</code>
           </td>
-          <td><p>The order in which scores are arranged.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The order in which scores are arranged.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2281,7 +2366,11 @@ <h2 id="src.pheval.analyse.disease_prioritisation_analysis.benchmark_disease_pri
           <td>
                 <code>float</code>
           </td>
-          <td><p>Threshold for assessment.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Threshold for assessment.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2291,7 +2380,11 @@ <h2 id="src.pheval.analyse.disease_prioritisation_analysis.benchmark_disease_pri
           <td>
                 <code><span title="collections.defaultdict">defaultdict</span></code>
           </td>
-          <td><p>Default dictionary for disease rank comparisons.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Default dictionary for disease rank comparisons.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2299,6 +2392,8 @@ <h2 id="src.pheval.analyse.disease_prioritisation_analysis.benchmark_disease_pri
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2311,19 +2406,27 @@ <h2 id="src.pheval.analyse.disease_prioritisation_analysis.benchmark_disease_pri
         <tr>
 <td><code>BenchmarkRunResults</code></td>          <td>
           </td>
-          <td><p>An object containing benchmarking results for disease prioritisation,</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>An object containing benchmarking results for disease prioritisation,</p>
+            </div>
+          </td>
         </tr>
         <tr>
 <td></td>          <td>
           </td>
-          <td><p>including ranks and rank statistics for the benchmarked directory.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>including ranks and rank statistics for the benchmarked directory.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/disease_prioritisation_analysis.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">254</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/disease_prioritisation_analysis.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">254</span>
 <span class="normal">255</span>
 <span class="normal">256</span>
 <span class="normal">257</span>
@@ -2397,7 +2500,7 @@ <h2 id="src.pheval.analyse.disease_prioritisation_analysis.benchmark_disease_pri
         <span class="n">binary_classification_stats</span><span class="o">=</span><span class="n">disease_binary_classification_stats</span><span class="p">,</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
diff --git a/api/pheval/analyse/gene_prioritisation_analysis/index.html b/api/pheval/analyse/gene_prioritisation_analysis/index.html
index e16554c8e..697341646 100644
--- a/api/pheval/analyse/gene_prioritisation_analysis/index.html
+++ b/api/pheval/analyse/gene_prioritisation_analysis/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/analyse/gene_prioritisation_analysis/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -561,7 +561,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.gene_prioritisation_analysis" class="md-nav__link">
-    src.pheval.analyse.gene_prioritisation_analysis
+    gene_prioritisation_analysis
   </a>
   
 </li>
@@ -576,14 +576,14 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation.assess_gene_prioritisation" class="md-nav__link">
-    assess_gene_prioritisation()
+    assess_gene_prioritisation
   </a>
   
 </li>
@@ -595,14 +595,14 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.gene_prioritisation_analysis.assess_phenopacket_gene_prioritisation" class="md-nav__link">
-    assess_phenopacket_gene_prioritisation()
+    assess_phenopacket_gene_prioritisation
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.gene_prioritisation_analysis.benchmark_gene_prioritisation" class="md-nav__link">
-    benchmark_gene_prioritisation()
+    benchmark_gene_prioritisation
   </a>
   
 </li>
@@ -1248,7 +1248,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.gene_prioritisation_analysis" class="md-nav__link">
-    src.pheval.analyse.gene_prioritisation_analysis
+    gene_prioritisation_analysis
   </a>
   
 </li>
@@ -1263,14 +1263,14 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation.assess_gene_prioritisation" class="md-nav__link">
-    assess_gene_prioritisation()
+    assess_gene_prioritisation
   </a>
   
 </li>
@@ -1282,14 +1282,14 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.gene_prioritisation_analysis.assess_phenopacket_gene_prioritisation" class="md-nav__link">
-    assess_phenopacket_gene_prioritisation()
+    assess_phenopacket_gene_prioritisation
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.gene_prioritisation_analysis.benchmark_gene_prioritisation" class="md-nav__link">
-    benchmark_gene_prioritisation()
+    benchmark_gene_prioritisation
   </a>
   
 </li>
@@ -1319,6 +1319,7 @@ <h1>Gene prioritisation analysis</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.analyse.gene_prioritisation_analysis"></a>
   <div class="doc doc-contents first">
 
@@ -1338,7 +1339,7 @@ <h1>Gene prioritisation analysis</h1>
 
 
 <h2 id="src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation" class="doc doc-heading">
-        <code>AssessGenePrioritisation</code>
+          <code>AssessGenePrioritisation</code>
 
 
 </h2>
@@ -1349,10 +1350,9 @@ <h2 id="src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation
   
       <p>Class for assessing gene prioritisation based on thresholds and scoring orders.</p>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/analyse/gene_prioritisation_analysis.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 19</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/analyse/gene_prioritisation_analysis.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 19</span>
 <span class="normal"> 20</span>
 <span class="normal"> 21</span>
 <span class="normal"> 22</span>
@@ -1760,7 +1760,7 @@ <h2 id="src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation
             <span class="n">pheval_results</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">standardised_gene_results</span><span class="p">,</span> <span class="n">relevant_ranks</span><span class="o">=</span><span class="n">relevant_ranks</span>
         <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1774,12 +1774,13 @@ <h2 id="src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation.__init__" class="doc doc-heading">
-<code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">results_dir</span><span class="p">,</span> <span class="n">standardised_gene_results</span><span class="p">,</span> <span class="n">threshold</span><span class="p">,</span> <span class="n">score_order</span><span class="p">,</span> <span class="n">proband_causative_genes</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">results_dir</span><span class="p">,</span> <span class="n">standardised_gene_results</span><span class="p">,</span> <span class="n">threshold</span><span class="p">,</span> <span class="n">score_order</span><span class="p">,</span> <span class="n">proband_causative_genes</span><span class="p">)</span></code>
 
 </h3>
 
@@ -1788,6 +1789,8 @@ <h3 id="src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation
   
       <p>Initialise AssessGenePrioritisation class.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1804,7 +1807,11 @@ <h3 id="src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the phenopacket file</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the phenopacket file</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1814,7 +1821,11 @@ <h3 id="src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the results directory</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the results directory</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1824,7 +1835,11 @@ <h3 id="src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation
           <td>
                 <code><span title="typing.List">List</span>[<span title="pheval.post_processing.post_processing.RankedPhEvalGeneResult">RankedPhEvalGeneResult</span>]</code>
           </td>
-          <td><p>List of ranked PhEval gene results</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List of ranked PhEval gene results</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1834,7 +1849,11 @@ <h3 id="src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation
           <td>
                 <code>float</code>
           </td>
-          <td><p>Threshold for scores</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Threshold for scores</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1844,7 +1863,11 @@ <h3 id="src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation
           <td>
                 <code>str</code>
           </td>
-          <td><p>Score order for results, either ascending or descending</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Score order for results, either ascending or descending</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1854,7 +1877,11 @@ <h3 id="src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation
           <td>
                 <code><span title="typing.List">List</span>[<span title="pheval.utils.phenopacket_utils.ProbandCausativeGene">ProbandCausativeGene</span>]</code>
           </td>
-          <td><p>List of proband causative genes</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List of proband causative genes</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1862,9 +1889,9 @@ <h3 id="src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/gene_prioritisation_analysis.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">22</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/gene_prioritisation_analysis.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">22</span>
 <span class="normal">23</span>
 <span class="normal">24</span>
 <span class="normal">25</span>
@@ -1916,17 +1943,18 @@ <h3 id="src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation
     <span class="bp">self</span><span class="o">.</span><span class="n">score_order</span> <span class="o">=</span> <span class="n">score_order</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">proband_causative_genes</span> <span class="o">=</span> <span class="n">proband_causative_genes</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation.assess_gene_prioritisation" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">assess_gene_prioritisation</span><span class="p">(</span><span class="n">rank_stats</span><span class="p">,</span> <span class="n">rank_records</span><span class="p">,</span> <span class="n">binary_classification_stats</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">assess_gene_prioritisation</span><span class="p">(</span><span class="n">rank_stats</span><span class="p">,</span> <span class="n">rank_records</span><span class="p">,</span> <span class="n">binary_classification_stats</span><span class="p">)</span></code>
 
 </h3>
 
@@ -1937,6 +1965,8 @@ <h3 id="src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation
 This method assesses the prioritisation of genes based on the provided criteria
 and records ranks using a PrioritisationRankRecorder.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1953,7 +1983,11 @@ <h3 id="src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation
           <td>
                 <code><span title="pheval.analyse.rank_stats.RankStats">RankStats</span></code>
           </td>
-          <td><p>RankStats class instance</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>RankStats class instance</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1963,7 +1997,11 @@ <h3 id="src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation
           <td>
                 <code><span title="collections.defaultdict">defaultdict</span></code>
           </td>
-          <td><p>A defaultdict to store the correct ranked results.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>A defaultdict to store the correct ranked results.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1973,7 +2011,11 @@ <h3 id="src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation
           <td>
                 <code><span title="pheval.analyse.binary_classification_stats.BinaryClassificationStats">BinaryClassificationStats</span></code>
           </td>
-          <td><p>BinaryClassificationStats class instance.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>BinaryClassificationStats class instance.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1981,9 +2023,9 @@ <h3 id="src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/gene_prioritisation_analysis.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">163</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/gene_prioritisation_analysis.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">163</span>
 <span class="normal">164</span>
 <span class="normal">165</span>
 <span class="normal">166</span>
@@ -2103,7 +2145,7 @@ <h3 id="src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation
         <span class="n">pheval_results</span><span class="o">=</span><span class="bp">self</span><span class="o">.</span><span class="n">standardised_gene_results</span><span class="p">,</span> <span class="n">relevant_ranks</span><span class="o">=</span><span class="n">relevant_ranks</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
@@ -2114,15 +2156,17 @@ <h3 id="src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation
 
   </div>
 
+
 </div>
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.analyse.gene_prioritisation_analysis.assess_phenopacket_gene_prioritisation" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">assess_phenopacket_gene_prioritisation</span><span class="p">(</span><span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">score_order</span><span class="p">,</span> <span class="n">results_dir_and_input</span><span class="p">,</span> <span class="n">threshold</span><span class="p">,</span> <span class="n">gene_rank_stats</span><span class="p">,</span> <span class="n">gene_rank_comparison</span><span class="p">,</span> <span class="n">gene_binary_classification_stats</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">assess_phenopacket_gene_prioritisation</span><span class="p">(</span><span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">score_order</span><span class="p">,</span> <span class="n">results_dir_and_input</span><span class="p">,</span> <span class="n">threshold</span><span class="p">,</span> <span class="n">gene_rank_stats</span><span class="p">,</span> <span class="n">gene_rank_comparison</span><span class="p">,</span> <span class="n">gene_binary_classification_stats</span><span class="p">)</span></code>
 
 </h2>
 
@@ -2132,6 +2176,8 @@ <h2 id="src.pheval.analyse.gene_prioritisation_analysis.assess_phenopacket_gene_
       <p>Assess gene prioritisation for a Phenopacket by comparing PhEval standardised gene results
 against the recorded causative genes for a proband in the Phenopacket.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2148,7 +2194,11 @@ <h2 id="src.pheval.analyse.gene_prioritisation_analysis.assess_phenopacket_gene_
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the Phenopacket.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the Phenopacket.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2158,7 +2208,11 @@ <h2 id="src.pheval.analyse.gene_prioritisation_analysis.assess_phenopacket_gene_
           <td>
                 <code>str</code>
           </td>
-          <td><p>The order in which scores are arranged, either ascending or descending.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The order in which scores are arranged, either ascending or descending.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2168,7 +2222,11 @@ <h2 id="src.pheval.analyse.gene_prioritisation_analysis.assess_phenopacket_gene_
           <td>
                 <code><span title="pheval.analyse.run_data_parser.TrackInputOutputDirectories">TrackInputOutputDirectories</span></code>
           </td>
-          <td><p>Input and output directories.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Input and output directories.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2178,7 +2236,11 @@ <h2 id="src.pheval.analyse.gene_prioritisation_analysis.assess_phenopacket_gene_
           <td>
                 <code>float</code>
           </td>
-          <td><p>Threshold for assessment.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Threshold for assessment.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2188,7 +2250,11 @@ <h2 id="src.pheval.analyse.gene_prioritisation_analysis.assess_phenopacket_gene_
           <td>
                 <code><span title="pheval.analyse.rank_stats.RankStats">RankStats</span></code>
           </td>
-          <td><p>RankStats class instance.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>RankStats class instance.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2198,7 +2264,11 @@ <h2 id="src.pheval.analyse.gene_prioritisation_analysis.assess_phenopacket_gene_
           <td>
                 <code><span title="collections.defaultdict">defaultdict</span></code>
           </td>
-          <td><p>Default dictionary for gene rank comparisons.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Default dictionary for gene rank comparisons.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2208,7 +2278,11 @@ <h2 id="src.pheval.analyse.gene_prioritisation_analysis.assess_phenopacket_gene_
           <td>
                 <code><span title="pheval.analyse.binary_classification_stats.BinaryClassificationStats">BinaryClassificationStats</span></code>
           </td>
-          <td><p>BinaryClassificationStats class instance.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>BinaryClassificationStats class instance.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2216,9 +2290,9 @@ <h2 id="src.pheval.analyse.gene_prioritisation_analysis.assess_phenopacket_gene_
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/gene_prioritisation_analysis.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">239</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/gene_prioritisation_analysis.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">239</span>
 <span class="normal">240</span>
 <span class="normal">241</span>
 <span class="normal">242</span>
@@ -2292,17 +2366,18 @@ <h2 id="src.pheval.analyse.gene_prioritisation_analysis.assess_phenopacket_gene_
         <span class="n">gene_rank_stats</span><span class="p">,</span> <span class="n">gene_rank_comparison</span><span class="p">,</span> <span class="n">gene_binary_classification_stats</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.analyse.gene_prioritisation_analysis.benchmark_gene_prioritisation" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">benchmark_gene_prioritisation</span><span class="p">(</span><span class="n">results_directory_and_input</span><span class="p">,</span> <span class="n">score_order</span><span class="p">,</span> <span class="n">threshold</span><span class="p">,</span> <span class="n">gene_rank_comparison</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">benchmark_gene_prioritisation</span><span class="p">(</span><span class="n">results_directory_and_input</span><span class="p">,</span> <span class="n">score_order</span><span class="p">,</span> <span class="n">threshold</span><span class="p">,</span> <span class="n">gene_rank_comparison</span><span class="p">)</span></code>
 
 </h2>
 
@@ -2319,9 +2394,9 @@ <h2 id="src.pheval.analyse.gene_prioritisation_analysis.benchmark_gene_prioritis
      BenchmarkRunResults: An object containing benchmarking results for gene prioritisation,
      including ranks and rank statistics for the benchmarked directory.</p>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/gene_prioritisation_analysis.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">278</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/gene_prioritisation_analysis.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">278</span>
 <span class="normal">279</span>
 <span class="normal">280</span>
 <span class="normal">281</span>
@@ -2391,7 +2466,7 @@ <h2 id="src.pheval.analyse.gene_prioritisation_analysis.benchmark_gene_prioritis
         <span class="n">binary_classification_stats</span><span class="o">=</span><span class="n">gene_binary_classification_stats</span><span class="p">,</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
diff --git a/api/pheval/analyse/generate_plots/index.html b/api/pheval/analyse/generate_plots/index.html
index 08bb76deb..4cba87853 100644
--- a/api/pheval/analyse/generate_plots/index.html
+++ b/api/pheval/analyse/generate_plots/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/analyse/generate_plots/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -575,7 +575,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_plots" class="md-nav__link">
-    src.pheval.analyse.generate_plots
+    generate_plots
   </a>
   
 </li>
@@ -590,49 +590,49 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_plots.PlotGenerator.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_plots.PlotGenerator.generate_cumulative_bar" class="md-nav__link">
-    generate_cumulative_bar()
+    generate_cumulative_bar
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_plots.PlotGenerator.generate_non_cumulative_bar" class="md-nav__link">
-    generate_non_cumulative_bar()
+    generate_non_cumulative_bar
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_plots.PlotGenerator.generate_precision_recall" class="md-nav__link">
-    generate_precision_recall()
+    generate_precision_recall
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_plots.PlotGenerator.generate_roc_curve" class="md-nav__link">
-    generate_roc_curve()
+    generate_roc_curve
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_plots.PlotGenerator.generate_stacked_bar_plot" class="md-nav__link">
-    generate_stacked_bar_plot()
+    generate_stacked_bar_plot
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_plots.PlotGenerator.return_benchmark_name" class="md-nav__link">
-    return_benchmark_name()
+    return_benchmark_name
   </a>
   
 </li>
@@ -644,21 +644,21 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_plots.generate_plots" class="md-nav__link">
-    generate_plots()
+    generate_plots
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_plots.generate_plots_from_benchmark_summary_tsv" class="md-nav__link">
-    generate_plots_from_benchmark_summary_tsv()
+    generate_plots_from_benchmark_summary_tsv
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_plots.trim_corpus_results_directory_suffix" class="md-nav__link">
-    trim_corpus_results_directory_suffix()
+    trim_corpus_results_directory_suffix
   </a>
   
 </li>
@@ -1290,7 +1290,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_plots" class="md-nav__link">
-    src.pheval.analyse.generate_plots
+    generate_plots
   </a>
   
 </li>
@@ -1305,49 +1305,49 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_plots.PlotGenerator.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_plots.PlotGenerator.generate_cumulative_bar" class="md-nav__link">
-    generate_cumulative_bar()
+    generate_cumulative_bar
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_plots.PlotGenerator.generate_non_cumulative_bar" class="md-nav__link">
-    generate_non_cumulative_bar()
+    generate_non_cumulative_bar
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_plots.PlotGenerator.generate_precision_recall" class="md-nav__link">
-    generate_precision_recall()
+    generate_precision_recall
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_plots.PlotGenerator.generate_roc_curve" class="md-nav__link">
-    generate_roc_curve()
+    generate_roc_curve
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_plots.PlotGenerator.generate_stacked_bar_plot" class="md-nav__link">
-    generate_stacked_bar_plot()
+    generate_stacked_bar_plot
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_plots.PlotGenerator.return_benchmark_name" class="md-nav__link">
-    return_benchmark_name()
+    return_benchmark_name
   </a>
   
 </li>
@@ -1359,21 +1359,21 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_plots.generate_plots" class="md-nav__link">
-    generate_plots()
+    generate_plots
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_plots.generate_plots_from_benchmark_summary_tsv" class="md-nav__link">
-    generate_plots_from_benchmark_summary_tsv()
+    generate_plots_from_benchmark_summary_tsv
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_plots.trim_corpus_results_directory_suffix" class="md-nav__link">
-    trim_corpus_results_directory_suffix()
+    trim_corpus_results_directory_suffix
   </a>
   
 </li>
@@ -1403,6 +1403,7 @@ <h1>Generate plots</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.analyse.generate_plots"></a>
   <div class="doc doc-contents first">
 
@@ -1422,7 +1423,7 @@ <h1>Generate plots</h1>
 
 
 <h2 id="src.pheval.analyse.generate_plots.PlotGenerator" class="doc doc-heading">
-        <code>PlotGenerator</code>
+          <code>PlotGenerator</code>
 
 
 </h2>
@@ -1433,10 +1434,9 @@ <h2 id="src.pheval.analyse.generate_plots.PlotGenerator" class="doc doc-heading"
   
       <p>Class to generate plots.</p>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/analyse/generate_plots.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 37</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/analyse/generate_plots.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 37</span>
 <span class="normal"> 38</span>
 <span class="normal"> 39</span>
 <span class="normal"> 40</span>
@@ -2318,7 +2318,7 @@ <h2 id="src.pheval.analyse.generate_plots.PlotGenerator" class="doc doc-heading"
             <span class="n">bbox_inches</span><span class="o">=</span><span class="s2">&quot;tight&quot;</span><span class="p">,</span>
         <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -2332,30 +2332,29 @@ <h2 id="src.pheval.analyse.generate_plots.PlotGenerator" class="doc doc-heading"
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.__init__" class="doc doc-heading">
-<code class="highlight language-python"><span class="fm">__init__</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="fm">__init__</span><span class="p">()</span></code>
 
 </h3>
 
 
   <div class="doc doc-contents ">
   
-      <p>Initialise the PlotGenerator class.</p>
-
-<details class="note">
-  <summary>Note</summary>
-  <p><code>self.stats</code> will be used to store statistics data.
-<code>self.mrr</code> will store Mean Reciprocal Rank (MRR) values.
-Matplotlib settings are configured to remove the right and top axes spines
-for generated plots.</p>
-</details>
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/generate_plots.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">53</span>
+      <p>Initialise the PlotGenerator class.
+Note:
+    <code>self.stats</code> will be used to store statistics data.
+    <code>self.mrr</code> will store Mean Reciprocal Rank (MRR) values.
+    Matplotlib settings are configured to remove the right and top axes spines
+    for generated plots.</p>
+
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/generate_plots.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">53</span>
 <span class="normal">54</span>
 <span class="normal">55</span>
 <span class="normal">56</span>
@@ -2383,17 +2382,18 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.__init__" class="doc doc
     <span class="n">matplotlib</span><span class="o">.</span><span class="n">rcParams</span><span class="p">[</span><span class="s2">&quot;axes.spines.right&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">False</span>
     <span class="n">matplotlib</span><span class="o">.</span><span class="n">rcParams</span><span class="p">[</span><span class="s2">&quot;axes.spines.top&quot;</span><span class="p">]</span> <span class="o">=</span> <span class="kc">False</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_cumulative_bar" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">generate_cumulative_bar</span><span class="p">(</span><span class="n">benchmarking_results</span><span class="p">,</span> <span class="n">benchmark_generator</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">generate_cumulative_bar</span><span class="p">(</span><span class="n">benchmarking_results</span><span class="p">,</span> <span class="n">benchmark_generator</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2402,6 +2402,8 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_cumulative_bar"
   
       <p>Generate a cumulative bar plot.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2418,7 +2420,11 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_cumulative_bar"
           <td>
                 <code><span title="typing.List">List</span>[<span title="pheval.analyse.benchmarking_data.BenchmarkRunResults">BenchmarkRunResults</span>]</code>
           </td>
-          <td><p>List of benchmarking results for multiple runs.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List of benchmarking results for multiple runs.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2428,7 +2434,11 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_cumulative_bar"
           <td>
                 <code><span title="pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator">BenchmarkRunOutputGenerator</span></code>
           </td>
-          <td><p>Object containing benchmarking output generation details.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Object containing benchmarking output generation details.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2438,7 +2448,11 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_cumulative_bar"
           <td>
                 <code>str</code>
           </td>
-          <td><p>Title for the generated plot. Defaults to None.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Title for the generated plot. Defaults to None.</p>
+            </div>
+          </td>
           <td>
                 <code>None</code>
           </td>
@@ -2446,9 +2460,9 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_cumulative_bar"
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/generate_plots.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">253</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/generate_plots.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">253</span>
 <span class="normal">254</span>
 <span class="normal">255</span>
 <span class="normal">256</span>
@@ -2528,17 +2542,18 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_cumulative_bar"
         <span class="n">bbox_inches</span><span class="o">=</span><span class="s2">&quot;tight&quot;</span><span class="p">,</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_non_cumulative_bar" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">generate_non_cumulative_bar</span><span class="p">(</span><span class="n">benchmarking_results</span><span class="p">,</span> <span class="n">benchmark_generator</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">generate_non_cumulative_bar</span><span class="p">(</span><span class="n">benchmarking_results</span><span class="p">,</span> <span class="n">benchmark_generator</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2547,6 +2562,8 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_non_cumulative_
   
       <p>Generate a non-cumulative bar plot.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2563,7 +2580,11 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_non_cumulative_
           <td>
                 <code><span title="typing.List">List</span>[<span title="pheval.analyse.benchmarking_data.BenchmarkRunResults">BenchmarkRunResults</span>]</code>
           </td>
-          <td><p>List of benchmarking results for multiple runs.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List of benchmarking results for multiple runs.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2573,7 +2594,11 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_non_cumulative_
           <td>
                 <code><span title="pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator">BenchmarkRunOutputGenerator</span></code>
           </td>
-          <td><p>Object containing benchmarking output generation details.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Object containing benchmarking output generation details.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2583,7 +2608,11 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_non_cumulative_
           <td>
                 <code>str</code>
           </td>
-          <td><p>Title for the generated plot. Defaults to None.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Title for the generated plot. Defaults to None.</p>
+            </div>
+          </td>
           <td>
                 <code>None</code>
           </td>
@@ -2591,9 +2620,9 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_non_cumulative_
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/generate_plots.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">437</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/generate_plots.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">437</span>
 <span class="normal">438</span>
 <span class="normal">439</span>
 <span class="normal">440</span>
@@ -2675,17 +2704,18 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_non_cumulative_
         <span class="n">bbox_inches</span><span class="o">=</span><span class="s2">&quot;tight&quot;</span><span class="p">,</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_precision_recall" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">generate_precision_recall</span><span class="p">(</span><span class="n">benchmarking_results</span><span class="p">,</span> <span class="n">benchmark_generator</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">generate_precision_recall</span><span class="p">(</span><span class="n">benchmarking_results</span><span class="p">,</span> <span class="n">benchmark_generator</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2694,6 +2724,8 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_precision_recal
   
       <p>Generate and plot Precision-Recall curves for binary classification benchmark results.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2710,7 +2742,11 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_precision_recal
           <td>
                 <code><span title="typing.List">List</span>[<span title="pheval.analyse.benchmarking_data.BenchmarkRunResults">BenchmarkRunResults</span>]</code>
           </td>
-          <td><p>List of benchmarking results for multiple runs.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List of benchmarking results for multiple runs.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2720,7 +2756,11 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_precision_recal
           <td>
                 <code><span title="pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator">BenchmarkRunOutputGenerator</span></code>
           </td>
-          <td><p>Object containing benchmarking output generation details.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Object containing benchmarking output generation details.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2728,9 +2768,9 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_precision_recal
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/generate_plots.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">399</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/generate_plots.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">399</span>
 <span class="normal">400</span>
 <span class="normal">401</span>
 <span class="normal">402</span>
@@ -2804,17 +2844,18 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_precision_recal
         <span class="n">bbox_inches</span><span class="o">=</span><span class="s2">&quot;tight&quot;</span><span class="p">,</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_roc_curve" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">generate_roc_curve</span><span class="p">(</span><span class="n">benchmarking_results</span><span class="p">,</span> <span class="n">benchmark_generator</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">generate_roc_curve</span><span class="p">(</span><span class="n">benchmarking_results</span><span class="p">,</span> <span class="n">benchmark_generator</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2823,6 +2864,8 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_roc_curve" clas
   
       <p>Generate and plot Receiver Operating Characteristic (ROC) curves for binary classification benchmark results.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2839,7 +2882,11 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_roc_curve" clas
           <td>
                 <code><span title="typing.List">List</span>[<span title="pheval.analyse.benchmarking_data.BenchmarkRunResults">BenchmarkRunResults</span>]</code>
           </td>
-          <td><p>List of benchmarking results for multiple runs.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List of benchmarking results for multiple runs.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2849,7 +2896,11 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_roc_curve" clas
           <td>
                 <code><span title="pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator">BenchmarkRunOutputGenerator</span></code>
           </td>
-          <td><p>Object containing benchmarking output generation details.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Object containing benchmarking output generation details.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2857,9 +2908,9 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_roc_curve" clas
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/generate_plots.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">361</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/generate_plots.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">361</span>
 <span class="normal">362</span>
 <span class="normal">363</span>
 <span class="normal">364</span>
@@ -2933,17 +2984,18 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_roc_curve" clas
         <span class="n">bbox_inches</span><span class="o">=</span><span class="s2">&quot;tight&quot;</span><span class="p">,</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_stacked_bar_plot" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">generate_stacked_bar_plot</span><span class="p">(</span><span class="n">benchmarking_results</span><span class="p">,</span> <span class="n">benchmark_generator</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">generate_stacked_bar_plot</span><span class="p">(</span><span class="n">benchmarking_results</span><span class="p">,</span> <span class="n">benchmark_generator</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2952,6 +3004,8 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_stacked_bar_plo
   
       <p>Generate a stacked bar plot and Mean Reciprocal Rank (MRR) bar plot.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2968,7 +3022,11 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_stacked_bar_plo
           <td>
                 <code><span title="typing.List">List</span>[<span title="pheval.analyse.benchmarking_data.BenchmarkRunResults">BenchmarkRunResults</span>]</code>
           </td>
-          <td><p>List of benchmarking results for multiple runs.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List of benchmarking results for multiple runs.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2978,7 +3036,11 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_stacked_bar_plo
           <td>
                 <code><span title="pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator">BenchmarkRunOutputGenerator</span></code>
           </td>
-          <td><p>Object containing benchmarking output generation details.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Object containing benchmarking output generation details.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2988,7 +3050,11 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_stacked_bar_plo
           <td>
                 <code>str</code>
           </td>
-          <td><p>Title for the generated plot. Defaults to None.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Title for the generated plot. Defaults to None.</p>
+            </div>
+          </td>
           <td>
                 <code>None</code>
           </td>
@@ -2996,9 +3062,9 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_stacked_bar_plo
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/generate_plots.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">144</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/generate_plots.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">144</span>
 <span class="normal">145</span>
 <span class="normal">146</span>
 <span class="normal">147</span>
@@ -3108,17 +3174,18 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.generate_stacked_bar_plo
         <span class="n">bbox_inches</span><span class="o">=</span><span class="s2">&quot;tight&quot;</span><span class="p">,</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.return_benchmark_name" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">return_benchmark_name</span><span class="p">(</span><span class="n">benchmark_result</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">return_benchmark_name</span><span class="p">(</span><span class="n">benchmark_result</span><span class="p">)</span></code>
 
 </h3>
 
@@ -3127,6 +3194,8 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.return_benchmark_name" c
   
       <p>Return the benchmark name for a run.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -3143,7 +3212,11 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.return_benchmark_name" c
           <td>
                 <code><span title="pheval.analyse.benchmarking_data.BenchmarkRunResults">BenchmarkRunResults</span></code>
           </td>
-          <td><p>The benchmarking results for a run.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The benchmarking results for a run.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3151,6 +3224,8 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.return_benchmark_name" c
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -3164,14 +3239,18 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.return_benchmark_name" c
 <td><code>str</code></td>          <td>
                 <code>str</code>
           </td>
-          <td><p>The benchmark name obtained from the given BenchmarkRunResults instance.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The benchmark name obtained from the given BenchmarkRunResults instance.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/generate_plots.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">81</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/generate_plots.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">81</span>
 <span class="normal">82</span>
 <span class="normal">83</span>
 <span class="normal">84</span>
@@ -3201,7 +3280,7 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.return_benchmark_name" c
         <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">_create_run_identifier</span><span class="p">(</span><span class="n">benchmark_result</span><span class="o">.</span><span class="n">results_dir</span><span class="p">)</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
@@ -3212,15 +3291,17 @@ <h3 id="src.pheval.analyse.generate_plots.PlotGenerator.return_benchmark_name" c
 
   </div>
 
+
 </div>
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.analyse.generate_plots.generate_plots" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">generate_plots</span><span class="p">(</span><span class="n">benchmarking_results</span><span class="p">,</span> <span class="n">benchmark_generator</span><span class="p">,</span> <span class="n">plot_type</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">generate_from_tsv</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">generate_plots</span><span class="p">(</span><span class="n">benchmarking_results</span><span class="p">,</span> <span class="n">benchmark_generator</span><span class="p">,</span> <span class="n">plot_type</span><span class="p">,</span> <span class="n">title</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">generate_from_tsv</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span></code>
 
 </h2>
 
@@ -3230,6 +3311,8 @@ <h2 id="src.pheval.analyse.generate_plots.generate_plots" class="doc doc-heading
       <p>Generate summary statistics bar plots for prioritisation.</p>
 <p>This method generates summary statistics bar plots based on the provided benchmarking results and plot type.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -3246,7 +3329,11 @@ <h2 id="src.pheval.analyse.generate_plots.generate_plots" class="doc doc-heading
           <td>
                 <code>list[<span title="pheval.analyse.benchmarking_data.BenchmarkRunResults">BenchmarkRunResults</span>]</code>
           </td>
-          <td><p>List of benchmarking results for multiple runs.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List of benchmarking results for multiple runs.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3256,7 +3343,11 @@ <h2 id="src.pheval.analyse.generate_plots.generate_plots" class="doc doc-heading
           <td>
                 <code><span title="pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator">BenchmarkRunOutputGenerator</span></code>
           </td>
-          <td><p>Object containing benchmarking output generation details.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Object containing benchmarking output generation details.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3266,7 +3357,11 @@ <h2 id="src.pheval.analyse.generate_plots.generate_plots" class="doc doc-heading
           <td>
                 <code>str</code>
           </td>
-          <td><p>Type of plot to be generated ("bar_stacked", "bar_cumulative", "bar_non_cumulative").</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Type of plot to be generated ("bar_stacked", "bar_cumulative", "bar_non_cumulative").</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3276,7 +3371,11 @@ <h2 id="src.pheval.analyse.generate_plots.generate_plots" class="doc doc-heading
           <td>
                 <code>str</code>
           </td>
-          <td><p>Title for the generated plot. Defaults to None.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Title for the generated plot. Defaults to None.</p>
+            </div>
+          </td>
           <td>
                 <code>None</code>
           </td>
@@ -3286,7 +3385,11 @@ <h2 id="src.pheval.analyse.generate_plots.generate_plots" class="doc doc-heading
           <td>
                 <code>bool</code>
           </td>
-          <td><p>Specify whether to generate plots from the TSV file. Defaults to False.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Specify whether to generate plots from the TSV file. Defaults to False.</p>
+            </div>
+          </td>
           <td>
                 <code>False</code>
           </td>
@@ -3294,9 +3397,9 @@ <h2 id="src.pheval.analyse.generate_plots.generate_plots" class="doc doc-heading
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/generate_plots.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">480</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/generate_plots.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">480</span>
 <span class="normal">481</span>
 <span class="normal">482</span>
 <span class="normal">483</span>
@@ -3354,17 +3457,18 @@ <h2 id="src.pheval.analyse.generate_plots.generate_plots" class="doc doc-heading
     <span class="k">elif</span> <span class="n">plot_type</span> <span class="o">==</span> <span class="s2">&quot;bar_non_cumulative&quot;</span><span class="p">:</span>
         <span class="n">plot_generator</span><span class="o">.</span><span class="n">generate_non_cumulative_bar</span><span class="p">(</span><span class="n">benchmarking_results</span><span class="p">,</span> <span class="n">benchmark_generator</span><span class="p">,</span> <span class="n">title</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.analyse.generate_plots.generate_plots_from_benchmark_summary_tsv" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">generate_plots_from_benchmark_summary_tsv</span><span class="p">(</span><span class="n">benchmark_summary_tsv</span><span class="p">,</span> <span class="n">gene_analysis</span><span class="p">,</span> <span class="n">variant_analysis</span><span class="p">,</span> <span class="n">disease_analysis</span><span class="p">,</span> <span class="n">plot_type</span><span class="p">,</span> <span class="n">title</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">generate_plots_from_benchmark_summary_tsv</span><span class="p">(</span><span class="n">benchmark_summary_tsv</span><span class="p">,</span> <span class="n">gene_analysis</span><span class="p">,</span> <span class="n">variant_analysis</span><span class="p">,</span> <span class="n">disease_analysis</span><span class="p">,</span> <span class="n">plot_type</span><span class="p">,</span> <span class="n">title</span><span class="p">)</span></code>
 
 </h2>
 
@@ -3375,6 +3479,8 @@ <h2 id="src.pheval.analyse.generate_plots.generate_plots_from_benchmark_summary_
 <p>Reads a summary of benchmark results from a TSV file and generates a bar plot
 based on the analysis type and plot type.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -3391,7 +3497,11 @@ <h2 id="src.pheval.analyse.generate_plots.generate_plots_from_benchmark_summary_
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the summary TSV file containing benchmark results.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the summary TSV file containing benchmark results.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3401,7 +3511,11 @@ <h2 id="src.pheval.analyse.generate_plots.generate_plots_from_benchmark_summary_
           <td>
                 <code>bool</code>
           </td>
-          <td><p>Flag indicating whether to analyse gene prioritisation.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Flag indicating whether to analyse gene prioritisation.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3411,7 +3525,11 @@ <h2 id="src.pheval.analyse.generate_plots.generate_plots_from_benchmark_summary_
           <td>
                 <code>bool</code>
           </td>
-          <td><p>Flag indicating whether to analyse variant prioritisation.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Flag indicating whether to analyse variant prioritisation.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3421,7 +3539,11 @@ <h2 id="src.pheval.analyse.generate_plots.generate_plots_from_benchmark_summary_
           <td>
                 <code>bool</code>
           </td>
-          <td><p>Flag indicating whether to analyse disease prioritisation.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Flag indicating whether to analyse disease prioritisation.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3431,7 +3553,11 @@ <h2 id="src.pheval.analyse.generate_plots.generate_plots_from_benchmark_summary_
           <td>
                 <code>str</code>
           </td>
-          <td><p>Type of plot to be generated ("bar_stacked", "bar_cumulative", "bar_non_cumulative").</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Type of plot to be generated ("bar_stacked", "bar_cumulative", "bar_non_cumulative").</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3441,35 +3567,23 @@ <h2 id="src.pheval.analyse.generate_plots.generate_plots_from_benchmark_summary_
           <td>
                 <code>str</code>
           </td>
-          <td><p>Title for the generated plot.</p></td>
           <td>
-              <em>required</em>
+            <div class="doc-md-description">
+              <p>Title for the generated plot.</p>
+            </div>
           </td>
-        </tr>
-    </tbody>
-  </table>
-
-  <p><strong>Raises:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Type</th>
-        <th>Description</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
           <td>
-                <code>ValueError</code>
+              <em>required</em>
           </td>
-          <td><p>If an unsupported plot type is specified.</p></td>
         </tr>
     </tbody>
   </table>
+      <p>Raises:
+     ValueError: If an unsupported plot type is specified.</p>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/generate_plots.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">511</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/generate_plots.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">511</span>
 <span class="normal">512</span>
 <span class="normal">513</span>
 <span class="normal">514</span>
@@ -3543,17 +3657,18 @@ <h2 id="src.pheval.analyse.generate_plots.generate_plots_from_benchmark_summary_
         <span class="p">)</span>
     <span class="n">generate_plots</span><span class="p">(</span><span class="n">benchmarking_results</span><span class="p">,</span> <span class="n">benchmark_generator</span><span class="p">,</span> <span class="n">plot_type</span><span class="p">,</span> <span class="n">title</span><span class="p">,</span> <span class="kc">True</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.analyse.generate_plots.trim_corpus_results_directory_suffix" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">trim_corpus_results_directory_suffix</span><span class="p">(</span><span class="n">corpus_results_directory</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">trim_corpus_results_directory_suffix</span><span class="p">(</span><span class="n">corpus_results_directory</span><span class="p">)</span></code>
 
 </h2>
 
@@ -3562,6 +3677,8 @@ <h2 id="src.pheval.analyse.generate_plots.trim_corpus_results_directory_suffix"
   
       <p>Trim the suffix from the corpus results directory name.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -3578,7 +3695,11 @@ <h2 id="src.pheval.analyse.generate_plots.trim_corpus_results_directory_suffix"
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory path containing corpus results.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory path containing corpus results.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3586,6 +3707,8 @@ <h2 id="src.pheval.analyse.generate_plots.trim_corpus_results_directory_suffix"
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -3599,14 +3722,18 @@ <h2 id="src.pheval.analyse.generate_plots.trim_corpus_results_directory_suffix"
 <td><code>Path</code></td>          <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The Path object with the suffix removed from the directory name.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The Path object with the suffix removed from the directory name.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/generate_plots.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">24</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/generate_plots.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">24</span>
 <span class="normal">25</span>
 <span class="normal">26</span>
 <span class="normal">27</span>
@@ -3628,7 +3755,7 @@ <h2 id="src.pheval.analyse.generate_plots.trim_corpus_results_directory_suffix"
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="k">return</span> <span class="n">Path</span><span class="p">(</span><span class="nb">str</span><span class="p">(</span><span class="n">corpus_results_directory</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="n">PHEVAL_RESULTS_DIRECTORY_SUFFIX</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">))</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
diff --git a/api/pheval/analyse/generate_summary_outputs/index.html b/api/pheval/analyse/generate_summary_outputs/index.html
index 6fa093dfe..2fda74b44 100644
--- a/api/pheval/analyse/generate_summary_outputs/index.html
+++ b/api/pheval/analyse/generate_summary_outputs/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/analyse/generate_summary_outputs/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -589,7 +589,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_summary_outputs" class="md-nav__link">
-    src.pheval.analyse.generate_summary_outputs
+    generate_summary_outputs
   </a>
   
 </li>
@@ -604,21 +604,21 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.generate_comparison_output" class="md-nav__link">
-    generate_comparison_output()
+    generate_comparison_output
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.generate_output" class="md-nav__link">
-    generate_output()
+    generate_output
   </a>
   
 </li>
@@ -630,21 +630,21 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_summary_outputs.generate_benchmark_comparison_output" class="md-nav__link">
-    generate_benchmark_comparison_output()
+    generate_benchmark_comparison_output
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_summary_outputs.generate_benchmark_output" class="md-nav__link">
-    generate_benchmark_output()
+    generate_benchmark_output
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_summary_outputs.merge_results" class="md-nav__link">
-    merge_results()
+    merge_results
   </a>
   
 </li>
@@ -1262,7 +1262,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_summary_outputs" class="md-nav__link">
-    src.pheval.analyse.generate_summary_outputs
+    generate_summary_outputs
   </a>
   
 </li>
@@ -1277,21 +1277,21 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.generate_comparison_output" class="md-nav__link">
-    generate_comparison_output()
+    generate_comparison_output
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.generate_output" class="md-nav__link">
-    generate_output()
+    generate_output
   </a>
   
 </li>
@@ -1303,21 +1303,21 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_summary_outputs.generate_benchmark_comparison_output" class="md-nav__link">
-    generate_benchmark_comparison_output()
+    generate_benchmark_comparison_output
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_summary_outputs.generate_benchmark_output" class="md-nav__link">
-    generate_benchmark_output()
+    generate_benchmark_output
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.generate_summary_outputs.merge_results" class="md-nav__link">
-    merge_results()
+    merge_results
   </a>
   
 </li>
@@ -1347,6 +1347,7 @@ <h1>Generate summary outputs</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.analyse.generate_summary_outputs"></a>
   <div class="doc doc-contents first">
 
@@ -1366,7 +1367,7 @@ <h1>Generate summary outputs</h1>
 
 
 <h2 id="src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator" class="doc doc-heading">
-        <code>RankComparisonGenerator</code>
+          <code>RankComparisonGenerator</code>
 
 
 </h2>
@@ -1377,10 +1378,9 @@ <h2 id="src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator" cla
   
       <p>Class for writing the run comparison of rank assignment for prioritisation.</p>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/analyse/generate_summary_outputs.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">15</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/analyse/generate_summary_outputs.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">15</span>
 <span class="normal">16</span>
 <span class="normal">17</span>
 <span class="normal">18</span>
@@ -1506,7 +1506,7 @@ <h2 id="src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator" cla
 <span class="sd">        &quot;&quot;&quot;</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">_calculate_rank_difference</span><span class="p">()</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="n">prefix</span> <span class="o">+</span> <span class="n">suffix</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s2">&quot;</span><span class="se">\t</span><span class="s2">&quot;</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1520,12 +1520,13 @@ <h2 id="src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator" cla
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.__init__" class="doc doc-heading">
-<code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">run_comparison</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">run_comparison</span><span class="p">)</span></code>
 
 </h3>
 
@@ -1534,6 +1535,8 @@ <h3 id="src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.__in
   
       <p>Initialise the RankComparisonGenerator class.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1550,7 +1553,11 @@ <h3 id="src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.__in
           <td>
                 <code><span title="collections.defaultdict">defaultdict</span></code>
           </td>
-          <td><p>A nested dictionary containing the run comparison data.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>A nested dictionary containing the run comparison data.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1558,9 +1565,9 @@ <h3 id="src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.__in
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/generate_summary_outputs.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">18</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/generate_summary_outputs.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">18</span>
 <span class="normal">19</span>
 <span class="normal">20</span>
 <span class="normal">21</span>
@@ -1576,17 +1583,18 @@ <h3 id="src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.__in
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">run_comparison</span> <span class="o">=</span> <span class="n">run_comparison</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.generate_comparison_output" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">generate_comparison_output</span><span class="p">(</span><span class="n">prefix</span><span class="p">,</span> <span class="n">suffix</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">generate_comparison_output</span><span class="p">(</span><span class="n">prefix</span><span class="p">,</span> <span class="n">suffix</span><span class="p">)</span></code>
 
 </h3>
 
@@ -1595,6 +1603,8 @@ <h3 id="src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.gene
   
       <p>Generate output file with calculated rank differences.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1611,7 +1621,11 @@ <h3 id="src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.gene
           <td>
                 <code>str</code>
           </td>
-          <td><p>Prefix for the output file name.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Prefix for the output file name.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1621,7 +1635,11 @@ <h3 id="src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.gene
           <td>
                 <code>str</code>
           </td>
-          <td><p>Suffix for the output file name.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Suffix for the output file name.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1629,9 +1647,9 @@ <h3 id="src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.gene
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/generate_summary_outputs.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">69</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/generate_summary_outputs.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">69</span>
 <span class="normal">70</span>
 <span class="normal">71</span>
 <span class="normal">72</span>
@@ -1649,17 +1667,18 @@ <h3 id="src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.gene
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">_calculate_rank_difference</span><span class="p">()</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="n">prefix</span> <span class="o">+</span> <span class="n">suffix</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s2">&quot;</span><span class="se">\t</span><span class="s2">&quot;</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.generate_output" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">generate_output</span><span class="p">(</span><span class="n">prefix</span><span class="p">,</span> <span class="n">suffix</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">generate_output</span><span class="p">(</span><span class="n">prefix</span><span class="p">,</span> <span class="n">suffix</span><span class="p">)</span></code>
 
 </h3>
 
@@ -1668,6 +1687,8 @@ <h3 id="src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.gene
   
       <p>Generate output file from the run comparison data.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1684,7 +1705,11 @@ <h3 id="src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.gene
           <td>
                 <code>str</code>
           </td>
-          <td><p>Prefix for the output file name.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Prefix for the output file name.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1694,7 +1719,11 @@ <h3 id="src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.gene
           <td>
                 <code>str</code>
           </td>
-          <td><p>Suffix for the output file name.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Suffix for the output file name.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1702,9 +1731,9 @@ <h3 id="src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.gene
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/generate_summary_outputs.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">59</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/generate_summary_outputs.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">59</span>
 <span class="normal">60</span>
 <span class="normal">61</span>
 <span class="normal">62</span>
@@ -1722,7 +1751,7 @@ <h3 id="src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.gene
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">_generate_dataframe</span><span class="p">()</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="n">prefix</span> <span class="o">+</span> <span class="n">suffix</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s2">&quot;</span><span class="se">\t</span><span class="s2">&quot;</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
@@ -1733,15 +1762,17 @@ <h3 id="src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.gene
 
   </div>
 
+
 </div>
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.analyse.generate_summary_outputs.generate_benchmark_comparison_output" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">generate_benchmark_comparison_output</span><span class="p">(</span><span class="n">benchmarking_results</span><span class="p">,</span> <span class="n">plot_type</span><span class="p">,</span> <span class="n">benchmark_generator</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">generate_benchmark_comparison_output</span><span class="p">(</span><span class="n">benchmarking_results</span><span class="p">,</span> <span class="n">plot_type</span><span class="p">,</span> <span class="n">benchmark_generator</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1753,6 +1784,8 @@ <h2 id="src.pheval.analyse.generate_summary_outputs.generate_benchmark_compariso
 between pairs of <code>BenchmarkRunResults</code> instances in <code>benchmarking_results</code> and generates rank
 comparison outputs using <code>RankComparisonGenerator</code> for each pair.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1769,8 +1802,12 @@ <h2 id="src.pheval.analyse.generate_summary_outputs.generate_benchmark_compariso
           <td>
                 <code><span title="typing.List">List</span>[<span title="pheval.analyse.benchmarking_data.BenchmarkRunResults">BenchmarkRunResults</span>]</code>
           </td>
-          <td><p>A list containing BenchmarkRunResults instances
-representing the benchmarking results of multiple runs.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>A list containing BenchmarkRunResults instances
+representing the benchmarking results of multiple runs.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1780,7 +1817,11 @@ <h2 id="src.pheval.analyse.generate_summary_outputs.generate_benchmark_compariso
           <td>
                 <code>str</code>
           </td>
-          <td><p>The type of plot to be generated.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The type of plot to be generated.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1790,7 +1831,11 @@ <h2 id="src.pheval.analyse.generate_summary_outputs.generate_benchmark_compariso
           <td>
                 <code><span title="pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator">BenchmarkRunOutputGenerator</span></code>
           </td>
-          <td><p>Object containing benchmarking output generation details.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Object containing benchmarking output generation details.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1798,9 +1843,9 @@ <h2 id="src.pheval.analyse.generate_summary_outputs.generate_benchmark_compariso
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/generate_summary_outputs.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">137</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/generate_summary_outputs.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">137</span>
 <span class="normal">138</span>
 <span class="normal">139</span>
 <span class="normal">140</span>
@@ -1878,17 +1923,18 @@ <h2 id="src.pheval.analyse.generate_summary_outputs.generate_benchmark_compariso
         <span class="n">plot_type</span><span class="p">,</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.analyse.generate_summary_outputs.generate_benchmark_output" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">generate_benchmark_output</span><span class="p">(</span><span class="n">benchmarking_results</span><span class="p">,</span> <span class="n">plot_type</span><span class="p">,</span> <span class="n">benchmark_generator</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">generate_benchmark_output</span><span class="p">(</span><span class="n">benchmarking_results</span><span class="p">,</span> <span class="n">plot_type</span><span class="p">,</span> <span class="n">benchmark_generator</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1897,6 +1943,8 @@ <h2 id="src.pheval.analyse.generate_summary_outputs.generate_benchmark_output" c
   
       <p>Generate prioritisation outputs for a single benchmarking run.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1913,7 +1961,11 @@ <h2 id="src.pheval.analyse.generate_summary_outputs.generate_benchmark_output" c
           <td>
                 <code><span title="pheval.analyse.benchmarking_data.BenchmarkRunResults">BenchmarkRunResults</span></code>
           </td>
-          <td><p>Results of a benchmarking run.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Results of a benchmarking run.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1923,7 +1975,11 @@ <h2 id="src.pheval.analyse.generate_summary_outputs.generate_benchmark_output" c
           <td>
                 <code>str</code>
           </td>
-          <td><p>Type of plot to generate.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Type of plot to generate.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1933,7 +1989,11 @@ <h2 id="src.pheval.analyse.generate_summary_outputs.generate_benchmark_output" c
           <td>
                 <code><span title="pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator">BenchmarkRunOutputGenerator</span></code>
           </td>
-          <td><p>Object containing benchmarking output generation details.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Object containing benchmarking output generation details.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1941,9 +2001,9 @@ <h2 id="src.pheval.analyse.generate_summary_outputs.generate_benchmark_output" c
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/generate_summary_outputs.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 80</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/generate_summary_outputs.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 80</span>
 <span class="normal"> 81</span>
 <span class="normal"> 82</span>
 <span class="normal"> 83</span>
@@ -1991,17 +2051,18 @@ <h2 id="src.pheval.analyse.generate_summary_outputs.generate_benchmark_output" c
         <span class="n">plot_type</span><span class="p">,</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.analyse.generate_summary_outputs.merge_results" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">merge_results</span><span class="p">(</span><span class="n">result1</span><span class="p">,</span> <span class="n">result2</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">merge_results</span><span class="p">(</span><span class="n">result1</span><span class="p">,</span> <span class="n">result2</span><span class="p">)</span></code>
 
 </h2>
 
@@ -2015,6 +2076,8 @@ <h2 id="src.pheval.analyse.generate_summary_outputs.merge_results" class="doc do
 will further merge their nested contents. If a key exists in <code>result2</code> but not in <code>result1</code>,
 it will be added to <code>result1</code>.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2031,7 +2094,11 @@ <h2 id="src.pheval.analyse.generate_summary_outputs.merge_results" class="doc do
           <td>
                 <code>dict</code>
           </td>
-          <td><p>The first dictionary to be merged.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The first dictionary to be merged.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2041,7 +2108,11 @@ <h2 id="src.pheval.analyse.generate_summary_outputs.merge_results" class="doc do
           <td>
                 <code>dict</code>
           </td>
-          <td><p>The second dictionary to be merged.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The second dictionary to be merged.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2049,6 +2120,8 @@ <h2 id="src.pheval.analyse.generate_summary_outputs.merge_results" class="doc do
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2062,14 +2135,18 @@ <h2 id="src.pheval.analyse.generate_summary_outputs.merge_results" class="doc do
 <td><code>defaultdict</code></td>          <td>
                 <code><span title="collections.defaultdict">defaultdict</span></code>
           </td>
-          <td><p>The merged dictionary containing the combined contents of <code>result1</code> and <code>result2</code>.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The merged dictionary containing the combined contents of <code>result1</code> and <code>result2</code>.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/generate_summary_outputs.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">106</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/generate_summary_outputs.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">106</span>
 <span class="normal">107</span>
 <span class="normal">108</span>
 <span class="normal">109</span>
@@ -2127,7 +2204,7 @@ <h2 id="src.pheval.analyse.generate_summary_outputs.merge_results" class="doc do
             <span class="n">result1</span><span class="p">[</span><span class="n">key</span><span class="p">]</span> <span class="o">=</span> <span class="n">val</span>
     <span class="k">return</span> <span class="n">result1</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
diff --git a/api/pheval/analyse/parse_benchmark_summary/index.html b/api/pheval/analyse/parse_benchmark_summary/index.html
index 65e89a642..0a5e9a46d 100644
--- a/api/pheval/analyse/parse_benchmark_summary/index.html
+++ b/api/pheval/analyse/parse_benchmark_summary/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/analyse/parse_benchmark_summary/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -603,21 +603,21 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.parse_benchmark_summary" class="md-nav__link">
-    src.pheval.analyse.parse_benchmark_summary
+    parse_benchmark_summary
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.parse_benchmark_summary.parse_benchmark_result_summary" class="md-nav__link">
-    parse_benchmark_result_summary()
+    parse_benchmark_result_summary
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.parse_benchmark_summary.read_benchmark_tsv_result_summary" class="md-nav__link">
-    read_benchmark_tsv_result_summary()
+    read_benchmark_tsv_result_summary
   </a>
   
 </li>
@@ -1221,21 +1221,21 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.parse_benchmark_summary" class="md-nav__link">
-    src.pheval.analyse.parse_benchmark_summary
+    parse_benchmark_summary
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.parse_benchmark_summary.parse_benchmark_result_summary" class="md-nav__link">
-    parse_benchmark_result_summary()
+    parse_benchmark_result_summary
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.parse_benchmark_summary.read_benchmark_tsv_result_summary" class="md-nav__link">
-    read_benchmark_tsv_result_summary()
+    read_benchmark_tsv_result_summary
   </a>
   
 </li>
@@ -1265,6 +1265,7 @@ <h1>Parse benchmark summary</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.analyse.parse_benchmark_summary"></a>
   <div class="doc doc-contents first">
 
@@ -1280,12 +1281,13 @@ <h1>Parse benchmark summary</h1>
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.analyse.parse_benchmark_summary.parse_benchmark_result_summary" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">parse_benchmark_result_summary</span><span class="p">(</span><span class="n">benchmarking_df</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">parse_benchmark_result_summary</span><span class="p">(</span><span class="n">benchmarking_df</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1294,6 +1296,8 @@ <h2 id="src.pheval.analyse.parse_benchmark_summary.parse_benchmark_result_summar
   
       <p>Parse the summary benchmark DataFrame into a list of BenchmarkRunResults.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1308,11 +1312,15 @@ <h2 id="src.pheval.analyse.parse_benchmark_summary.parse_benchmark_result_summar
         <tr>
           <td><code>benchmarking_df</code></td>
           <td>
-                <code><span title="pandas">pd</span>.<span title="pandas.DataFrame">DataFrame</span></code>
+                <code><span title="pandas.DataFrame">DataFrame</span></code>
           </td>
-          <td><p>Summary benchmark DataFrame containing columns such as
+          <td>
+            <div class="doc-md-description">
+              <p>Summary benchmark DataFrame containing columns such as
                             'results_directory_path', 'top', 'top3', 'top5', 'top10',
-                            'found', 'total', 'mean_reciprocal_rank'.</p></td>
+                            'found', 'total', 'mean_reciprocal_rank'.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1320,6 +1328,8 @@ <h2 id="src.pheval.analyse.parse_benchmark_summary.parse_benchmark_result_summar
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -1333,14 +1343,18 @@ <h2 id="src.pheval.analyse.parse_benchmark_summary.parse_benchmark_result_summar
           <td>
                 <code><span title="typing.List">List</span>[<span title="pheval.analyse.benchmarking_data.BenchmarkRunResults">BenchmarkRunResults</span>]</code>
           </td>
-          <td><p>List[BenchmarkRunResults]: A list of BenchmarkRunResults instances generated from the DataFrame.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List[BenchmarkRunResults]: A list of BenchmarkRunResults instances generated from the DataFrame.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/parse_benchmark_summary.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">39</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/parse_benchmark_summary.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">39</span>
 <span class="normal">40</span>
 <span class="normal">41</span>
 <span class="normal">42</span>
@@ -1400,17 +1414,18 @@ <h2 id="src.pheval.analyse.parse_benchmark_summary.parse_benchmark_result_summar
         <span class="n">benchmarking_results</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">benchmarking_result</span><span class="p">)</span>
     <span class="k">return</span> <span class="n">benchmarking_results</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.analyse.parse_benchmark_summary.read_benchmark_tsv_result_summary" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">read_benchmark_tsv_result_summary</span><span class="p">(</span><span class="n">benchmarking_tsv</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">read_benchmark_tsv_result_summary</span><span class="p">(</span><span class="n">benchmarking_tsv</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1419,6 +1434,8 @@ <h2 id="src.pheval.analyse.parse_benchmark_summary.read_benchmark_tsv_result_sum
   
       <p>Read the summary benchmark TSV output generated from the benchmark-comparison command.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1435,7 +1452,11 @@ <h2 id="src.pheval.analyse.parse_benchmark_summary.read_benchmark_tsv_result_sum
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the summary benchmark TSV output file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the summary benchmark TSV output file.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1443,6 +1464,8 @@ <h2 id="src.pheval.analyse.parse_benchmark_summary.read_benchmark_tsv_result_sum
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -1454,18 +1477,22 @@ <h2 id="src.pheval.analyse.parse_benchmark_summary.read_benchmark_tsv_result_sum
     <tbody>
         <tr>
           <td>
-                <code><span title="pandas">pd</span>.<span title="pandas.DataFrame">DataFrame</span></code>
+                <code><span title="pandas.DataFrame">DataFrame</span></code>
           </td>
-          <td><p>pd.DataFrame: A pandas DataFrame containing specific columns from the TSV file, including:
+          <td>
+            <div class="doc-md-description">
+              <p>pd.DataFrame: A pandas DataFrame containing specific columns from the TSV file, including:
           'results_directory_path', 'top', 'top3', 'top5', 'top10', 'found',
-          'total', 'mean_reciprocal_rank'.</p></td>
+          'total', 'mean_reciprocal_rank'.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/parse_benchmark_summary.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">11</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/parse_benchmark_summary.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">11</span>
 <span class="normal">12</span>
 <span class="normal">13</span>
 <span class="normal">14</span>
@@ -1517,7 +1544,7 @@ <h2 id="src.pheval.analyse.parse_benchmark_summary.read_benchmark_tsv_result_sum
         <span class="p">],</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
diff --git a/api/pheval/analyse/parse_pheval_result/index.html b/api/pheval/analyse/parse_pheval_result/index.html
index 1dd1d5940..393c2ed33 100644
--- a/api/pheval/analyse/parse_pheval_result/index.html
+++ b/api/pheval/analyse/parse_pheval_result/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/analyse/parse_pheval_result/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -617,21 +617,21 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.parse_pheval_result" class="md-nav__link">
-    src.pheval.analyse.parse_pheval_result
+    parse_pheval_result
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.parse_pheval_result.parse_pheval_result" class="md-nav__link">
-    parse_pheval_result()
+    parse_pheval_result
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.parse_pheval_result.read_standardised_result" class="md-nav__link">
-    read_standardised_result()
+    read_standardised_result
   </a>
   
 </li>
@@ -1221,21 +1221,21 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.parse_pheval_result" class="md-nav__link">
-    src.pheval.analyse.parse_pheval_result
+    parse_pheval_result
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.parse_pheval_result.parse_pheval_result" class="md-nav__link">
-    parse_pheval_result()
+    parse_pheval_result
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.parse_pheval_result.read_standardised_result" class="md-nav__link">
-    read_standardised_result()
+    read_standardised_result
   </a>
   
 </li>
@@ -1265,6 +1265,7 @@ <h1>Parse pheval result</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.analyse.parse_pheval_result"></a>
   <div class="doc doc-contents first">
 
@@ -1280,12 +1281,13 @@ <h1>Parse pheval result</h1>
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.analyse.parse_pheval_result.parse_pheval_result" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">parse_pheval_result</span><span class="p">(</span><span class="n">data_class_type</span><span class="p">,</span> <span class="n">pheval_result</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">parse_pheval_result</span><span class="p">(</span><span class="n">data_class_type</span><span class="p">,</span> <span class="n">pheval_result</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1294,6 +1296,8 @@ <h2 id="src.pheval.analyse.parse_pheval_result.parse_pheval_result" class="doc d
   
       <p>Parse PhEval result into specified dataclass type.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1310,7 +1314,11 @@ <h2 id="src.pheval.analyse.parse_pheval_result.parse_pheval_result" class="doc d
           <td>
                 <code><span title="pheval.post_processing.post_processing.PhEvalResult">PhEvalResult</span></code>
           </td>
-          <td><p>The data class type to parse the result into.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The data class type to parse the result into.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1320,7 +1328,11 @@ <h2 id="src.pheval.analyse.parse_pheval_result.parse_pheval_result" class="doc d
           <td>
                 <code><span title="typing.List">List</span>[dict]</code>
           </td>
-          <td><p>A list of dictionaries representing the PhEval result.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>A list of dictionaries representing the PhEval result.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1328,6 +1340,8 @@ <h2 id="src.pheval.analyse.parse_pheval_result.parse_pheval_result" class="doc d
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -1341,20 +1355,28 @@ <h2 id="src.pheval.analyse.parse_pheval_result.parse_pheval_result" class="doc d
           <td>
                 <code><span title="typing.List">List</span>[<span title="pheval.post_processing.post_processing.PhEvalResult">PhEvalResult</span>]</code>
           </td>
-          <td><p>List[PhEvalResult]: A list of instances of the specified data class type,</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List[PhEvalResult]: A list of instances of the specified data class type,</p>
+            </div>
+          </td>
         </tr>
         <tr>
           <td>
                 <code><span title="typing.List">List</span>[<span title="pheval.post_processing.post_processing.PhEvalResult">PhEvalResult</span>]</code>
           </td>
-          <td><p>each instance representing a row in the PhEval result.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>each instance representing a row in the PhEval result.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/parse_pheval_result.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">29</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/parse_pheval_result.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">29</span>
 <span class="normal">30</span>
 <span class="normal">31</span>
 <span class="normal">32</span>
@@ -1384,17 +1406,18 @@ <h2 id="src.pheval.analyse.parse_pheval_result.parse_pheval_result" class="doc d
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="k">return</span> <span class="p">[</span><span class="n">data_class_type</span><span class="p">(</span><span class="o">**</span><span class="n">row</span><span class="p">)</span> <span class="k">for</span> <span class="n">row</span> <span class="ow">in</span> <span class="n">pheval_result</span><span class="p">]</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.analyse.parse_pheval_result.read_standardised_result" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">read_standardised_result</span><span class="p">(</span><span class="n">standardised_result_path</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">read_standardised_result</span><span class="p">(</span><span class="n">standardised_result_path</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1403,6 +1426,8 @@ <h2 id="src.pheval.analyse.parse_pheval_result.read_standardised_result" class="
   
       <p>Read the standardised result output and return a list of dictionaries.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1419,7 +1444,11 @@ <h2 id="src.pheval.analyse.parse_pheval_result.read_standardised_result" class="
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The path to the file containing the standardised result output.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The path to the file containing the standardised result output.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1427,6 +1456,8 @@ <h2 id="src.pheval.analyse.parse_pheval_result.read_standardised_result" class="
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -1440,14 +1471,18 @@ <h2 id="src.pheval.analyse.parse_pheval_result.read_standardised_result" class="
           <td>
                 <code><span title="typing.List">List</span>[dict]</code>
           </td>
-          <td><p>List[dict]: A list of dictionaries representing the content of the standardised result file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List[dict]: A list of dictionaries representing the content of the standardised result file.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/parse_pheval_result.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">12</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/parse_pheval_result.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">12</span>
 <span class="normal">13</span>
 <span class="normal">14</span>
 <span class="normal">15</span>
@@ -1477,7 +1512,7 @@ <h2 id="src.pheval.analyse.parse_pheval_result.read_standardised_result" class="
         <span class="n">info_log</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Could not find </span><span class="si">{</span><span class="n">standardised_result_path</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
         <span class="k">return</span> <span class="n">pd</span><span class="o">.</span><span class="n">DataFrame</span><span class="p">()</span><span class="o">.</span><span class="n">to_dict</span><span class="p">(</span><span class="s2">&quot;records&quot;</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
diff --git a/api/pheval/analyse/prioritisation_rank_recorder/index.html b/api/pheval/analyse/prioritisation_rank_recorder/index.html
index b222f91ae..2e571b0d5 100644
--- a/api/pheval/analyse/prioritisation_rank_recorder/index.html
+++ b/api/pheval/analyse/prioritisation_rank_recorder/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/analyse/prioritisation_rank_recorder/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -631,7 +631,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.prioritisation_rank_recorder" class="md-nav__link">
-    src.pheval.analyse.prioritisation_rank_recorder
+    prioritisation_rank_recorder
   </a>
   
 </li>
@@ -646,7 +646,7 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.prioritisation_rank_recorder.PrioritisationRankRecorder.record_rank" class="md-nav__link">
-    record_rank()
+    record_rank
   </a>
   
 </li>
@@ -1227,7 +1227,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.prioritisation_rank_recorder" class="md-nav__link">
-    src.pheval.analyse.prioritisation_rank_recorder
+    prioritisation_rank_recorder
   </a>
   
 </li>
@@ -1242,7 +1242,7 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.prioritisation_rank_recorder.PrioritisationRankRecorder.record_rank" class="md-nav__link">
-    record_rank()
+    record_rank
   </a>
   
 </li>
@@ -1277,6 +1277,7 @@ <h1>Prioritisation rank recorder</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.analyse.prioritisation_rank_recorder"></a>
   <div class="doc doc-contents first">
 
@@ -1296,7 +1297,7 @@ <h1>Prioritisation rank recorder</h1>
 
 
 <h2 id="src.pheval.analyse.prioritisation_rank_recorder.PrioritisationRankRecorder" class="doc doc-heading">
-        <code>PrioritisationRankRecorder</code>
+          <code>PrioritisationRankRecorder</code>
 
   
   <span class="doc doc-labels">
@@ -1311,6 +1312,8 @@ <h2 id="src.pheval.analyse.prioritisation_rank_recorder.PrioritisationRankRecord
   
       <p>Record ranks for different types of prioritisation results.</p>
 
+
+
   <p><strong>Attributes:</strong></p>
   <table>
     <thead>
@@ -1322,33 +1325,44 @@ <h2 id="src.pheval.analyse.prioritisation_rank_recorder.PrioritisationRankRecord
     </thead>
     <tbody>
         <tr>
-          <td><code>index</code></td>
+          <td><code><span title="src.pheval.analyse.prioritisation_rank_recorder.PrioritisationRankRecorder.index">index</span></code></td>
           <td>
                 <code>int</code>
           </td>
-          <td><p>The index representing the run.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The index representing the run.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>directory</code></td>
+          <td><code><span title="src.pheval.analyse.prioritisation_rank_recorder.PrioritisationRankRecorder.directory">directory</span></code></td>
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The result directory path.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The result directory path.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>run_comparison</code></td>
+          <td><code><span title="src.pheval.analyse.prioritisation_rank_recorder.PrioritisationRankRecorder.run_comparison">run_comparison</span></code></td>
           <td>
                 <code><span title="collections.defaultdict">defaultdict</span></code>
           </td>
-          <td><p>The comparison dictionary to record ranks.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The comparison dictionary to record ranks.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/analyse/prioritisation_rank_recorder.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">13</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/analyse/prioritisation_rank_recorder.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">13</span>
 <span class="normal">14</span>
 <span class="normal">15</span>
 <span class="normal">16</span>
@@ -1490,7 +1504,7 @@ <h2 id="src.pheval.analyse.prioritisation_rank_recorder.PrioritisationRankRecord
             <span class="bp">self</span><span class="o">.</span><span class="n">_record_disease_rank</span><span class="p">()</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">run_comparison</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="p">][</span><span class="bp">self</span><span class="o">.</span><span class="n">directory</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">prioritisation_result</span><span class="o">.</span><span class="n">rank</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1504,12 +1518,13 @@ <h2 id="src.pheval.analyse.prioritisation_rank_recorder.PrioritisationRankRecord
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.prioritisation_rank_recorder.PrioritisationRankRecorder.record_rank" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">record_rank</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">record_rank</span><span class="p">()</span></code>
 
 </h3>
 
@@ -1522,9 +1537,9 @@ <h3 id="src.pheval.analyse.prioritisation_rank_recorder.PrioritisationRankRecord
 dictionary for each respective run, allowing comparison and analysis of the ranks of correct results
 across different runs.</p>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/prioritisation_rank_recorder.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">65</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/prioritisation_rank_recorder.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">65</span>
 <span class="normal">66</span>
 <span class="normal">67</span>
 <span class="normal">68</span>
@@ -1562,7 +1577,7 @@ <h3 id="src.pheval.analyse.prioritisation_rank_recorder.PrioritisationRankRecord
         <span class="bp">self</span><span class="o">.</span><span class="n">_record_disease_rank</span><span class="p">()</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">run_comparison</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">index</span><span class="p">][</span><span class="bp">self</span><span class="o">.</span><span class="n">directory</span><span class="p">]</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">prioritisation_result</span><span class="o">.</span><span class="n">rank</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
@@ -1573,6 +1588,7 @@ <h3 id="src.pheval.analyse.prioritisation_rank_recorder.PrioritisationRankRecord
 
   </div>
 
+
 </div>
 
 
diff --git a/api/pheval/analyse/prioritisation_result_types/index.html b/api/pheval/analyse/prioritisation_result_types/index.html
index 012ebd7c4..974dca726 100644
--- a/api/pheval/analyse/prioritisation_result_types/index.html
+++ b/api/pheval/analyse/prioritisation_result_types/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/analyse/prioritisation_result_types/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -645,7 +645,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.prioritisation_result_types" class="md-nav__link">
-    src.pheval.analyse.prioritisation_result_types
+    prioritisation_result_types
   </a>
   
 </li>
@@ -1228,7 +1228,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.prioritisation_result_types" class="md-nav__link">
-    src.pheval.analyse.prioritisation_result_types
+    prioritisation_result_types
   </a>
   
 </li>
@@ -1279,6 +1279,7 @@ <h1>Prioritisation result types</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.analyse.prioritisation_result_types"></a>
   <div class="doc doc-contents first">
 
@@ -1298,7 +1299,7 @@ <h1>Prioritisation result types</h1>
 
 
 <h2 id="src.pheval.analyse.prioritisation_result_types.DiseasePrioritisationResult" class="doc doc-heading">
-        <code>DiseasePrioritisationResult</code>
+          <code>DiseasePrioritisationResult</code>
 
   
   <span class="doc doc-labels">
@@ -1313,6 +1314,8 @@ <h2 id="src.pheval.analyse.prioritisation_result_types.DiseasePrioritisationResu
   
       <p>Store rank data for known diseases.</p>
 
+
+
   <p><strong>Attributes:</strong></p>
   <table>
     <thead>
@@ -1324,33 +1327,44 @@ <h2 id="src.pheval.analyse.prioritisation_result_types.DiseasePrioritisationResu
     </thead>
     <tbody>
         <tr>
-          <td><code>phenopacket_path</code></td>
+          <td><code><span title="src.pheval.analyse.prioritisation_result_types.DiseasePrioritisationResult.phenopacket_path">phenopacket_path</span></code></td>
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the phenopacket.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the phenopacket.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>disease</code></td>
+          <td><code><span title="src.pheval.analyse.prioritisation_result_types.DiseasePrioritisationResult.disease">disease</span></code></td>
           <td>
                 <code><span title="pheval.utils.phenopacket_utils.ProbandDisease">ProbandDisease</span></code>
           </td>
-          <td><p>The proband disease.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The proband disease.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>rank</code></td>
+          <td><code><span title="src.pheval.analyse.prioritisation_result_types.DiseasePrioritisationResult.rank">rank</span></code></td>
           <td>
                 <code>int</code>
           </td>
-          <td><p>The assigned rank for the disease. Defaults to 0.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The assigned rank for the disease. Defaults to 0.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/analyse/prioritisation_result_types.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">39</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/analyse/prioritisation_result_types.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">39</span>
 <span class="normal">40</span>
 <span class="normal">41</span>
 <span class="normal">42</span>
@@ -1378,7 +1392,7 @@ <h2 id="src.pheval.analyse.prioritisation_result_types.DiseasePrioritisationResu
     <span class="n">disease</span><span class="p">:</span> <span class="n">ProbandDisease</span>
     <span class="n">rank</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1398,6 +1412,7 @@ <h2 id="src.pheval.analyse.prioritisation_result_types.DiseasePrioritisationResu
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -1405,7 +1420,7 @@ <h2 id="src.pheval.analyse.prioritisation_result_types.DiseasePrioritisationResu
 
 
 <h2 id="src.pheval.analyse.prioritisation_result_types.GenePrioritisationResult" class="doc doc-heading">
-        <code>GenePrioritisationResult</code>
+          <code>GenePrioritisationResult</code>
 
   
   <span class="doc doc-labels">
@@ -1420,6 +1435,8 @@ <h2 id="src.pheval.analyse.prioritisation_result_types.GenePrioritisationResult"
   
       <p>Store rank data for causative genes.</p>
 
+
+
   <p><strong>Attributes:</strong></p>
   <table>
     <thead>
@@ -1431,33 +1448,44 @@ <h2 id="src.pheval.analyse.prioritisation_result_types.GenePrioritisationResult"
     </thead>
     <tbody>
         <tr>
-          <td><code>phenopacket_path</code></td>
+          <td><code><span title="src.pheval.analyse.prioritisation_result_types.GenePrioritisationResult.phenopacket_path">phenopacket_path</span></code></td>
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the phenopacket.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the phenopacket.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>gene</code></td>
+          <td><code><span title="src.pheval.analyse.prioritisation_result_types.GenePrioritisationResult.gene">gene</span></code></td>
           <td>
                 <code>str</code>
           </td>
-          <td><p>The causative gene.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The causative gene.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>rank</code></td>
+          <td><code><span title="src.pheval.analyse.prioritisation_result_types.GenePrioritisationResult.rank">rank</span></code></td>
           <td>
                 <code>int</code>
           </td>
-          <td><p>The assigned rank for the gene. Defaults to 0.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The assigned rank for the gene. Defaults to 0.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/analyse/prioritisation_result_types.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 7</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/analyse/prioritisation_result_types.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 7</span>
 <span class="normal"> 8</span>
 <span class="normal"> 9</span>
 <span class="normal">10</span>
@@ -1485,7 +1513,7 @@ <h2 id="src.pheval.analyse.prioritisation_result_types.GenePrioritisationResult"
     <span class="n">gene</span><span class="p">:</span> <span class="nb">str</span>
     <span class="n">rank</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1505,6 +1533,7 @@ <h2 id="src.pheval.analyse.prioritisation_result_types.GenePrioritisationResult"
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -1512,7 +1541,7 @@ <h2 id="src.pheval.analyse.prioritisation_result_types.GenePrioritisationResult"
 
 
 <h2 id="src.pheval.analyse.prioritisation_result_types.VariantPrioritisationResult" class="doc doc-heading">
-        <code>VariantPrioritisationResult</code>
+          <code>VariantPrioritisationResult</code>
 
   
   <span class="doc doc-labels">
@@ -1527,6 +1556,8 @@ <h2 id="src.pheval.analyse.prioritisation_result_types.VariantPrioritisationResu
   
       <p>Store rank data for variants.</p>
 
+
+
   <p><strong>Attributes:</strong></p>
   <table>
     <thead>
@@ -1538,33 +1569,44 @@ <h2 id="src.pheval.analyse.prioritisation_result_types.VariantPrioritisationResu
     </thead>
     <tbody>
         <tr>
-          <td><code>phenopacket_path</code></td>
+          <td><code><span title="src.pheval.analyse.prioritisation_result_types.VariantPrioritisationResult.phenopacket_path">phenopacket_path</span></code></td>
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the phenopacket.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the phenopacket.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>variant</code></td>
+          <td><code><span title="src.pheval.analyse.prioritisation_result_types.VariantPrioritisationResult.variant">variant</span></code></td>
           <td>
                 <code><span title="pheval.utils.phenopacket_utils.GenomicVariant">GenomicVariant</span></code>
           </td>
-          <td><p>The genomic variant.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The genomic variant.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>rank</code></td>
+          <td><code><span title="src.pheval.analyse.prioritisation_result_types.VariantPrioritisationResult.rank">rank</span></code></td>
           <td>
                 <code>int</code>
           </td>
-          <td><p>The assigned rank for the variant. Defaults to 0.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The assigned rank for the variant. Defaults to 0.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/analyse/prioritisation_result_types.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">23</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/analyse/prioritisation_result_types.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">23</span>
 <span class="normal">24</span>
 <span class="normal">25</span>
 <span class="normal">26</span>
@@ -1592,7 +1634,7 @@ <h2 id="src.pheval.analyse.prioritisation_result_types.VariantPrioritisationResu
     <span class="n">variant</span><span class="p">:</span> <span class="n">GenomicVariant</span>
     <span class="n">rank</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1612,6 +1654,7 @@ <h2 id="src.pheval.analyse.prioritisation_result_types.VariantPrioritisationResu
 
   </div>
 
+
 </div>
 
 
diff --git a/api/pheval/analyse/rank_stats/index.html b/api/pheval/analyse/rank_stats/index.html
index 4d02cb19f..94e17f9ef 100644
--- a/api/pheval/analyse/rank_stats/index.html
+++ b/api/pheval/analyse/rank_stats/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/analyse/rank_stats/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -659,7 +659,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats" class="md-nav__link">
-    src.pheval.analyse.rank_stats
+    rank_stats
   </a>
   
 </li>
@@ -674,98 +674,98 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.add_rank" class="md-nav__link">
-    add_rank()
+    add_rank
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.f_beta_score_at_k" class="md-nav__link">
-    f_beta_score_at_k()
+    f_beta_score_at_k
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.mean_average_precision_at_k" class="md-nav__link">
-    mean_average_precision_at_k()
+    mean_average_precision_at_k
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.mean_normalised_discounted_cumulative_gain" class="md-nav__link">
-    mean_normalised_discounted_cumulative_gain()
+    mean_normalised_discounted_cumulative_gain
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.mean_reciprocal_rank" class="md-nav__link">
-    mean_reciprocal_rank()
+    mean_reciprocal_rank
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.percentage_difference" class="md-nav__link">
-    percentage_difference()
+    percentage_difference
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.percentage_found" class="md-nav__link">
-    percentage_found()
+    percentage_found
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.percentage_rank" class="md-nav__link">
-    percentage_rank()
+    percentage_rank
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.percentage_top" class="md-nav__link">
-    percentage_top()
+    percentage_top
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.percentage_top10" class="md-nav__link">
-    percentage_top10()
+    percentage_top10
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.percentage_top3" class="md-nav__link">
-    percentage_top3()
+    percentage_top3
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.percentage_top5" class="md-nav__link">
-    percentage_top5()
+    percentage_top5
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.precision_at_k" class="md-nav__link">
-    precision_at_k()
+    precision_at_k
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.return_mean_reciprocal_rank" class="md-nav__link">
-    return_mean_reciprocal_rank()
+    return_mean_reciprocal_rank
   </a>
   
 </li>
@@ -785,21 +785,21 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStatsWriter.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStatsWriter.close" class="md-nav__link">
-    close()
+    close
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStatsWriter.write_row" class="md-nav__link">
-    write_row()
+    write_row
   </a>
   
 </li>
@@ -1352,7 +1352,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats" class="md-nav__link">
-    src.pheval.analyse.rank_stats
+    rank_stats
   </a>
   
 </li>
@@ -1367,98 +1367,98 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.add_rank" class="md-nav__link">
-    add_rank()
+    add_rank
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.f_beta_score_at_k" class="md-nav__link">
-    f_beta_score_at_k()
+    f_beta_score_at_k
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.mean_average_precision_at_k" class="md-nav__link">
-    mean_average_precision_at_k()
+    mean_average_precision_at_k
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.mean_normalised_discounted_cumulative_gain" class="md-nav__link">
-    mean_normalised_discounted_cumulative_gain()
+    mean_normalised_discounted_cumulative_gain
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.mean_reciprocal_rank" class="md-nav__link">
-    mean_reciprocal_rank()
+    mean_reciprocal_rank
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.percentage_difference" class="md-nav__link">
-    percentage_difference()
+    percentage_difference
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.percentage_found" class="md-nav__link">
-    percentage_found()
+    percentage_found
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.percentage_rank" class="md-nav__link">
-    percentage_rank()
+    percentage_rank
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.percentage_top" class="md-nav__link">
-    percentage_top()
+    percentage_top
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.percentage_top10" class="md-nav__link">
-    percentage_top10()
+    percentage_top10
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.percentage_top3" class="md-nav__link">
-    percentage_top3()
+    percentage_top3
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.percentage_top5" class="md-nav__link">
-    percentage_top5()
+    percentage_top5
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.precision_at_k" class="md-nav__link">
-    precision_at_k()
+    precision_at_k
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStats.return_mean_reciprocal_rank" class="md-nav__link">
-    return_mean_reciprocal_rank()
+    return_mean_reciprocal_rank
   </a>
   
 </li>
@@ -1478,21 +1478,21 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStatsWriter.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStatsWriter.close" class="md-nav__link">
-    close()
+    close
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.rank_stats.RankStatsWriter.write_row" class="md-nav__link">
-    write_row()
+    write_row
   </a>
   
 </li>
@@ -1527,6 +1527,7 @@ <h1>Rank stats</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.analyse.rank_stats"></a>
   <div class="doc doc-contents first">
 
@@ -1546,7 +1547,7 @@ <h1>Rank stats</h1>
 
 
 <h2 id="src.pheval.analyse.rank_stats.RankStats" class="doc doc-heading">
-        <code>RankStats</code>
+          <code>RankStats</code>
 
   
   <span class="doc doc-labels">
@@ -1561,6 +1562,8 @@ <h2 id="src.pheval.analyse.rank_stats.RankStats" class="doc doc-heading">
   
       <p>Store statistics related to ranking.</p>
 
+
+
   <p><strong>Attributes:</strong></p>
   <table>
     <thead>
@@ -1572,75 +1575,110 @@ <h2 id="src.pheval.analyse.rank_stats.RankStats" class="doc doc-heading">
     </thead>
     <tbody>
         <tr>
-          <td><code>top</code></td>
+          <td><code><span title="src.pheval.analyse.rank_stats.RankStats.top">top</span></code></td>
           <td>
                 <code>int</code>
           </td>
-          <td><p>Count of top-ranked matches.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Count of top-ranked matches.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>top3</code></td>
+          <td><code><span title="src.pheval.analyse.rank_stats.RankStats.top3">top3</span></code></td>
           <td>
                 <code>int</code>
           </td>
-          <td><p>Count of matches within the top 3 ranks.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Count of matches within the top 3 ranks.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>top5</code></td>
+          <td><code><span title="src.pheval.analyse.rank_stats.RankStats.top5">top5</span></code></td>
           <td>
                 <code>int</code>
           </td>
-          <td><p>Count of matches within the top 5 ranks.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Count of matches within the top 5 ranks.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>top10</code></td>
+          <td><code><span title="src.pheval.analyse.rank_stats.RankStats.top10">top10</span></code></td>
           <td>
                 <code>int</code>
           </td>
-          <td><p>Count of matches within the top 10 ranks.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Count of matches within the top 10 ranks.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>found</code></td>
+          <td><code><span title="src.pheval.analyse.rank_stats.RankStats.found">found</span></code></td>
           <td>
                 <code>int</code>
           </td>
-          <td><p>Count of found matches.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Count of found matches.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>total</code></td>
+          <td><code><span title="src.pheval.analyse.rank_stats.RankStats.total">total</span></code></td>
           <td>
                 <code>int</code>
           </td>
-          <td><p>Total count of matches.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Total count of matches.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>reciprocal_ranks</code></td>
+          <td><code><span title="src.pheval.analyse.rank_stats.RankStats.reciprocal_ranks">reciprocal_ranks</span></code></td>
           <td>
                 <code><span title="typing.List">List</span>[float]</code>
           </td>
-          <td><p>List of reciprocal ranks.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List of reciprocal ranks.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>relevant_ranks</code></td>
+          <td><code><span title="src.pheval.analyse.rank_stats.RankStats.relevant_ranks">relevant_ranks</span></code></td>
           <td>
                 <code><span title="typing.List">List</span>[<span title="typing.List">List</span>[int]]</code>
           </td>
-          <td><p>Nested list of ranks for the known entities for all cases in a run.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Nested list of ranks for the known entities for all cases in a run.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>mrr</code></td>
+          <td><code><span title="src.pheval.analyse.rank_stats.RankStats.mrr">mrr</span></code></td>
           <td>
                 <code>float</code>
           </td>
-          <td><p>Mean Reciprocal Rank (MRR). Defaults to None.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Mean Reciprocal Rank (MRR). Defaults to None.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 13</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 13</span>
 <span class="normal"> 14</span>
 <span class="normal"> 15</span>
 <span class="normal"> 16</span>
@@ -2170,7 +2208,7 @@ <h2 id="src.pheval.analyse.rank_stats.RankStats" class="doc doc-heading">
             <span class="n">ndcg_scores</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">ndcg_score</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="n">ideal_ranking</span><span class="p">]),</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="n">result_ranks</span><span class="p">])))</span>
         <span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">ndcg_scores</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -2184,12 +2222,13 @@ <h2 id="src.pheval.analyse.rank_stats.RankStats" class="doc doc-heading">
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.rank_stats.RankStats.add_rank" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">add_rank</span><span class="p">(</span><span class="n">rank</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">add_rank</span><span class="p">(</span><span class="n">rank</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2198,6 +2237,8 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.add_rank" class="doc doc-heading
   
       <p>Add rank for matched result.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2214,7 +2255,11 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.add_rank" class="doc doc-heading
           <td>
                 <code>int</code>
           </td>
-          <td><p>The rank value to be added.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The rank value to be added.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2222,16 +2267,16 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.add_rank" class="doc doc-heading
     </tbody>
   </table>
 
-<details class="notes">
+<details class="notes" open>
   <summary>Notes</summary>
   <p>This method updates the internal attributes of the RankStats object based on the provided rank value.
 It calculates various statistics such as the count of top ranks (1, 3, 5, and 10),
 the total number of ranks found,and the reciprocal rank.
 This function modifies the object's state by updating the internal attributes.</p>
 </details>
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">39</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">39</span>
 <span class="normal">40</span>
 <span class="normal">41</span>
 <span class="normal">42</span>
@@ -2277,17 +2322,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.add_rank" class="doc doc-heading
     <span class="k">if</span> <span class="n">rank</span> <span class="o">!=</span> <span class="s2">&quot;&quot;</span> <span class="ow">and</span> <span class="n">rank</span> <span class="o">&lt;=</span> <span class="mi">10</span><span class="p">:</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">top10</span> <span class="o">+=</span> <span class="mi">1</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.rank_stats.RankStats.f_beta_score_at_k" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">f_beta_score_at_k</span><span class="p">(</span><span class="n">percentage_at_k</span><span class="p">,</span> <span class="n">k</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">f_beta_score_at_k</span><span class="p">(</span><span class="n">percentage_at_k</span><span class="p">,</span> <span class="n">k</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2300,6 +2346,8 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.f_beta_score_at_k" class="doc do
 The Beta value is set to the value of 1 to allow for equal weighting for both precision and recall.
 This method computes the F-beta score at a specific percentage threshold within the top-k predictions.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2316,7 +2364,11 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.f_beta_score_at_k" class="doc do
           <td>
                 <code>float</code>
           </td>
-          <td><p>The percentage of true positive predictions within the top-k.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The percentage of true positive predictions within the top-k.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2326,7 +2378,11 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.f_beta_score_at_k" class="doc do
           <td>
                 <code>int</code>
           </td>
-          <td><p>The number of top predictions to consider.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The number of top predictions to consider.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2334,6 +2390,8 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.f_beta_score_at_k" class="doc do
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2347,15 +2405,19 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.f_beta_score_at_k" class="doc do
 <td><code>float</code></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>The F-beta score at k, ranging from 0.0 to 1.0.
-   A higher score indicates better trade-off between precision and recall.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The F-beta score at k, ranging from 0.0 to 1.0.
+   A higher score indicates better trade-off between precision and recall.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">234</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">234</span>
 <span class="normal">235</span>
 <span class="normal">236</span>
 <span class="normal">237</span>
@@ -2403,17 +2465,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.f_beta_score_at_k" class="doc do
         <span class="k">else</span> <span class="mi">0</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.rank_stats.RankStats.mean_average_precision_at_k" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">mean_average_precision_at_k</span><span class="p">(</span><span class="n">k</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">mean_average_precision_at_k</span><span class="p">(</span><span class="n">k</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2424,6 +2487,8 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.mean_average_precision_at_k" cla
 <p>Mean Average Precision at k (MAP@k) is a performance metric for ranked data.
 It calculates the average precision at k for each result rank and then takes the mean across all queries.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2440,7 +2505,11 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.mean_average_precision_at_k" cla
           <td>
                 <code>int</code>
           </td>
-          <td><p>The number of top predictions to consider for precision calculation.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The number of top predictions to consider for precision calculation.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2448,6 +2517,8 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.mean_average_precision_at_k" cla
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2461,15 +2532,19 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.mean_average_precision_at_k" cla
 <td><code>float</code></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>The Mean Average Precision at k, ranging from 0.0 to 1.0.
-   A higher value indicates better performance in ranking relevant entities higher in the predictions.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The Mean Average Precision at k, ranging from 0.0 to 1.0.
+   A higher value indicates better performance in ranking relevant entities higher in the predictions.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">208</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">208</span>
 <span class="normal">209</span>
 <span class="normal">210</span>
 <span class="normal">211</span>
@@ -2519,17 +2594,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.mean_average_precision_at_k" cla
             <span class="p">)</span>
     <span class="k">return</span> <span class="p">(</span><span class="mi">1</span> <span class="o">/</span> <span class="bp">self</span><span class="o">.</span><span class="n">total</span><span class="p">)</span> <span class="o">*</span> <span class="n">cumulative_average_precision_scores</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.rank_stats.RankStats.mean_normalised_discounted_cumulative_gain" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">mean_normalised_discounted_cumulative_gain</span><span class="p">(</span><span class="n">k</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">mean_normalised_discounted_cumulative_gain</span><span class="p">(</span><span class="n">k</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2539,6 +2615,8 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.mean_normalised_discounted_cumul
       <p>Calculate the mean Normalised Discounted Cumulative Gain (NDCG) for a given rank cutoff.</p>
 <p>NDCG measures the effectiveness of a ranking by considering both the relevance and the order of items.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2555,7 +2633,11 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.mean_normalised_discounted_cumul
           <td>
                 <code>int</code>
           </td>
-          <td><p>The rank cutoff for calculating NDCG.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The rank cutoff for calculating NDCG.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2563,6 +2645,8 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.mean_normalised_discounted_cumul
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2576,14 +2660,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.mean_normalised_discounted_cumul
 <td><code>float</code></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>The mean NDCG score across all query results.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The mean NDCG score across all query results.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">259</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">259</span>
 <span class="normal">260</span>
 <span class="normal">261</span>
 <span class="normal">262</span>
@@ -2621,17 +2709,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.mean_normalised_discounted_cumul
         <span class="n">ndcg_scores</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">ndcg_score</span><span class="p">(</span><span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="n">ideal_ranking</span><span class="p">]),</span> <span class="n">np</span><span class="o">.</span><span class="n">asarray</span><span class="p">([</span><span class="n">result_ranks</span><span class="p">])))</span>
     <span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">mean</span><span class="p">(</span><span class="n">ndcg_scores</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.rank_stats.RankStats.mean_reciprocal_rank" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">mean_reciprocal_rank</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">mean_reciprocal_rank</span><span class="p">()</span></code>
 
 </h3>
 
@@ -2644,6 +2733,8 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.mean_reciprocal_rank" class="doc
 <p>If the total number of cases differs from the number of found cases,
 this method extends the reciprocal ranks list with zeroes for missing cases.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2657,14 +2748,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.mean_reciprocal_rank" class="doc
 <td><code>float</code></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>The calculated Mean Reciprocal Rank.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The calculated Mean Reciprocal Rank.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">134</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">134</span>
 <span class="normal">135</span>
 <span class="normal">136</span>
 <span class="normal">137</span>
@@ -2700,17 +2795,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.mean_reciprocal_rank" class="doc
         <span class="k">return</span> <span class="n">mean</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">reciprocal_ranks</span><span class="p">)</span>
     <span class="k">return</span> <span class="n">mean</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">reciprocal_ranks</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_difference" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">percentage_difference</span><span class="p">(</span><span class="n">percentage_value_1</span><span class="p">,</span> <span class="n">percentage_value_2</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">percentage_difference</span><span class="p">(</span><span class="n">percentage_value_1</span><span class="p">,</span> <span class="n">percentage_value_2</span><span class="p">)</span></code>
   
   <span class="doc doc-labels">
       <small class="doc doc-label doc-label-staticmethod"><code>staticmethod</code></small>
@@ -2723,6 +2819,8 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_difference" class="do
   
       <p>Calculate the percentage difference between two percentage values.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2739,7 +2837,11 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_difference" class="do
           <td>
                 <code>float</code>
           </td>
-          <td><p>The first percentage value.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The first percentage value.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2749,7 +2851,11 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_difference" class="do
           <td>
                 <code>float</code>
           </td>
-          <td><p>The second percentage value.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The second percentage value.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2757,6 +2863,8 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_difference" class="do
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2770,14 +2878,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_difference" class="do
 <td><code>float</code></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>The difference between the two percentage values.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The difference between the two percentage values.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">120</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">120</span>
 <span class="normal">121</span>
 <span class="normal">122</span>
 <span class="normal">123</span>
@@ -2803,17 +2915,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_difference" class="do
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="k">return</span> <span class="n">percentage_value_1</span> <span class="o">-</span> <span class="n">percentage_value_2</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_found" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">percentage_found</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">percentage_found</span><span class="p">()</span></code>
 
 </h3>
 
@@ -2822,6 +2935,8 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_found" class="doc doc
   
       <p>Calculate the percentage of matches found.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2835,14 +2950,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_found" class="doc doc
 <td><code>float</code></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>The percentage of matches found compared to the total count.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The percentage of matches found compared to the total count.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">111</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">111</span>
 <span class="normal">112</span>
 <span class="normal">113</span>
 <span class="normal">114</span>
@@ -2858,17 +2977,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_found" class="doc doc
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">percentage_rank</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">found</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_rank" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">percentage_rank</span><span class="p">(</span><span class="n">value</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">percentage_rank</span><span class="p">(</span><span class="n">value</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2877,6 +2997,8 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_rank" class="doc doc-
   
       <p>Calculate the percentage rank.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2893,7 +3015,11 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_rank" class="doc doc-
           <td>
                 <code>int</code>
           </td>
-          <td><p>The value for which the percentage rank needs to be calculated.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The value for which the percentage rank needs to be calculated.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2901,6 +3027,8 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_rank" class="doc doc-
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2914,14 +3042,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_rank" class="doc doc-
 <td><code>float</code></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>The calculated percentage rank based on the provided value and the total count.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The calculated percentage rank based on the provided value and the total count.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">63</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">63</span>
 <span class="normal">64</span>
 <span class="normal">65</span>
 <span class="normal">66</span>
@@ -2943,17 +3075,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_rank" class="doc doc-
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="k">return</span> <span class="mi">100</span> <span class="o">*</span> <span class="n">value</span> <span class="o">/</span> <span class="bp">self</span><span class="o">.</span><span class="n">total</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_top" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">percentage_top</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">percentage_top</span><span class="p">()</span></code>
 
 </h3>
 
@@ -2962,6 +3095,8 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_top" class="doc doc-h
   
       <p>Calculate the percentage of top matches.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2975,14 +3110,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_top" class="doc doc-h
 <td><code>float</code></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>The percentage of top matches compared to the total count.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The percentage of top matches compared to the total count.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">75</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">75</span>
 <span class="normal">76</span>
 <span class="normal">77</span>
 <span class="normal">78</span>
@@ -2998,17 +3137,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_top" class="doc doc-h
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">percentage_rank</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">top</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_top10" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">percentage_top10</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">percentage_top10</span><span class="p">()</span></code>
 
 </h3>
 
@@ -3017,6 +3157,8 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_top10" class="doc doc
   
       <p>Calculate the percentage of matches within the top 10.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -3030,14 +3172,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_top10" class="doc doc
 <td><code>float</code></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>The percentage of matches within the top 10 compared to the total count.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The percentage of matches within the top 10 compared to the total count.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">102</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">102</span>
 <span class="normal">103</span>
 <span class="normal">104</span>
 <span class="normal">105</span>
@@ -3053,17 +3199,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_top10" class="doc doc
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">percentage_rank</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">top10</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_top3" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">percentage_top3</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">percentage_top3</span><span class="p">()</span></code>
 
 </h3>
 
@@ -3072,6 +3219,8 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_top3" class="doc doc-
   
       <p>Calculate the percentage of matches within the top 3.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -3085,14 +3234,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_top3" class="doc doc-
 <td><code>float</code></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>The percentage of matches within the top 3 compared to the total count.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The percentage of matches within the top 3 compared to the total count.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">84</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">84</span>
 <span class="normal">85</span>
 <span class="normal">86</span>
 <span class="normal">87</span>
@@ -3108,17 +3261,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_top3" class="doc doc-
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">percentage_rank</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">top3</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_top5" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">percentage_top5</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">percentage_top5</span><span class="p">()</span></code>
 
 </h3>
 
@@ -3127,6 +3281,8 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_top5" class="doc doc-
   
       <p>Calculate the percentage of matches within the top 5.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -3140,14 +3296,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_top5" class="doc doc-
 <td><code>float</code></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>The percentage of matches within the top 5 compared to the total count.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The percentage of matches within the top 5 compared to the total count.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 93</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 93</span>
 <span class="normal"> 94</span>
 <span class="normal"> 95</span>
 <span class="normal"> 96</span>
@@ -3163,17 +3323,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.percentage_top5" class="doc doc-
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">percentage_rank</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">top5</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.rank_stats.RankStats.precision_at_k" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">precision_at_k</span><span class="p">(</span><span class="n">k</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">precision_at_k</span><span class="p">(</span><span class="n">k</span><span class="p">)</span></code>
 
 </h3>
 
@@ -3184,6 +3345,8 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.precision_at_k" class="doc doc-h
 Precision at k is the ratio of relevant items in the top-k predictions to the total number of predictions.
 It measures the accuracy of the top-k predictions made by a model.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -3200,7 +3363,11 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.precision_at_k" class="doc doc-h
           <td>
                 <code>int</code>
           </td>
-          <td><p>The number of top predictions to consider.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The number of top predictions to consider.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3208,6 +3375,8 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.precision_at_k" class="doc doc-h
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -3221,20 +3390,28 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.precision_at_k" class="doc doc-h
 <td><code>float</code></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>The precision at k, ranging from 0.0 to 1.0.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The precision at k, ranging from 0.0 to 1.0.</p>
+            </div>
+          </td>
         </tr>
         <tr>
 <td></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>A higher precision indicates a better performance in identifying relevant items in the top-k predictions.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>A higher precision indicates a better performance in identifying relevant items in the top-k predictions.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">168</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">168</span>
 <span class="normal">169</span>
 <span class="normal">170</span>
 <span class="normal">171</span>
@@ -3264,17 +3441,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.precision_at_k" class="doc doc-h
     <span class="n">k_attr</span> <span class="o">=</span> <span class="nb">getattr</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;top</span><span class="si">{</span><span class="n">k</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span> <span class="k">if</span> <span class="n">k</span> <span class="o">&gt;</span> <span class="mi">1</span> <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">top</span>
     <span class="k">return</span> <span class="n">k_attr</span> <span class="o">/</span> <span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">total</span> <span class="o">*</span> <span class="n">k</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.rank_stats.RankStats.return_mean_reciprocal_rank" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">return_mean_reciprocal_rank</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">return_mean_reciprocal_rank</span><span class="p">()</span></code>
 
 </h3>
 
@@ -3285,6 +3463,8 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.return_mean_reciprocal_rank" cla
 <p>If a pre-calculated MRR value exists (stored in the 'mrr' attribute), this method returns that value.
 Otherwise, it computes the Mean Reciprocal Rank using the 'mean_reciprocal_rank' method.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -3298,14 +3478,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.return_mean_reciprocal_rank" cla
 <td><code>float</code></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>The Mean Reciprocal Rank value.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The Mean Reciprocal Rank value.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">153</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">153</span>
 <span class="normal">154</span>
 <span class="normal">155</span>
 <span class="normal">156</span>
@@ -3333,7 +3517,7 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.return_mean_reciprocal_rank" cla
     <span class="k">else</span><span class="p">:</span>
         <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">mean_reciprocal_rank</span><span class="p">()</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
@@ -3344,6 +3528,7 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.return_mean_reciprocal_rank" cla
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -3351,7 +3536,7 @@ <h3 id="src.pheval.analyse.rank_stats.RankStats.return_mean_reciprocal_rank" cla
 
 
 <h2 id="src.pheval.analyse.rank_stats.RankStatsWriter" class="doc doc-heading">
-        <code>RankStatsWriter</code>
+          <code>RankStatsWriter</code>
 
 
 </h2>
@@ -3362,10 +3547,9 @@ <h2 id="src.pheval.analyse.rank_stats.RankStatsWriter" class="doc doc-heading">
   
       <p>Class for writing the rank stats to a file.</p>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">280</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">280</span>
 <span class="normal">281</span>
 <span class="normal">282</span>
 <span class="normal">283</span>
@@ -3635,7 +3819,7 @@ <h2 id="src.pheval.analyse.rank_stats.RankStatsWriter" class="doc doc-heading">
         <span class="k">except</span> <span class="ne">IOError</span><span class="p">:</span>
             <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Error closing &quot;</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">file</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -3649,47 +3833,26 @@ <h2 id="src.pheval.analyse.rank_stats.RankStatsWriter" class="doc doc-heading">
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.rank_stats.RankStatsWriter.__init__" class="doc doc-heading">
-<code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">file</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">file</span><span class="p">)</span></code>
 
 </h3>
 
 
   <div class="doc doc-contents ">
   
-      <p>Initialise the RankStatsWriter class</p>
-
-  <p><strong>Parameters:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Name</th>
-        <th>Type</th>
-        <th>Description</th>
-        <th>Default</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-          <td><code>file</code></td>
-          <td>
-                <code><span title="pathlib.Path">Path</span></code>
-          </td>
-          <td><p>Path to the file where rank stats will be written</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-    </tbody>
-  </table>
+      <p>Initialise the RankStatsWriter class
+Args:
+    file (Path): Path to the file where rank stats will be written</p>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">283</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">283</span>
 <span class="normal">284</span>
 <span class="normal">285</span>
 <span class="normal">286</span>
@@ -3797,17 +3960,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStatsWriter.__init__" class="doc doc-h
         <span class="p">]</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.rank_stats.RankStatsWriter.close" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">close</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">close</span><span class="p">()</span></code>
 
 </h3>
 
@@ -3816,6 +3980,8 @@ <h3 id="src.pheval.analyse.rank_stats.RankStatsWriter.close" class="doc doc-head
   
       <p>Close the file used for writing rank statistics.</p>
 
+
+
   <p><strong>Raises:</strong></p>
   <table>
     <thead>
@@ -3829,14 +3995,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStatsWriter.close" class="doc doc-head
           <td>
                 <code>IOError</code>
           </td>
-          <td><p>If there's an error while closing the file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>If there's an error while closing the file.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">404</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">404</span>
 <span class="normal">405</span>
 <span class="normal">406</span>
 <span class="normal">407</span>
@@ -3858,17 +4028,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStatsWriter.close" class="doc doc-head
     <span class="k">except</span> <span class="ne">IOError</span><span class="p">:</span>
         <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Error closing &quot;</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">file</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.rank_stats.RankStatsWriter.write_row" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">write_row</span><span class="p">(</span><span class="n">directory</span><span class="p">,</span> <span class="n">rank_stats</span><span class="p">,</span> <span class="n">binary_classification</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">write_row</span><span class="p">(</span><span class="n">directory</span><span class="p">,</span> <span class="n">rank_stats</span><span class="p">,</span> <span class="n">binary_classification</span><span class="p">)</span></code>
 
 </h3>
 
@@ -3877,6 +4048,8 @@ <h3 id="src.pheval.analyse.rank_stats.RankStatsWriter.write_row" class="doc doc-
   
       <p>Write summary rank statistics row for a run to the file.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -3893,7 +4066,11 @@ <h3 id="src.pheval.analyse.rank_stats.RankStatsWriter.write_row" class="doc doc-
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the results directory corresponding to the run</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the results directory corresponding to the run</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3903,7 +4080,11 @@ <h3 id="src.pheval.analyse.rank_stats.RankStatsWriter.write_row" class="doc doc-
           <td>
                 <code><a class="autorefs autorefs-internal" title="src.pheval.analyse.rank_stats.RankStats" href="#src.pheval.analyse.rank_stats.RankStats">RankStats</a></code>
           </td>
-          <td><p>RankStats instance containing rank statistics corresponding to the run</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>RankStats instance containing rank statistics corresponding to the run</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3911,6 +4092,8 @@ <h3 id="src.pheval.analyse.rank_stats.RankStatsWriter.write_row" class="doc doc-
     </tbody>
   </table>
 
+
+
   <p><strong>Raises:</strong></p>
   <table>
     <thead>
@@ -3924,14 +4107,18 @@ <h3 id="src.pheval.analyse.rank_stats.RankStatsWriter.write_row" class="doc doc-
           <td>
                 <code>IOError</code>
           </td>
-          <td><p>If there is an error writing to the file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>If there is an error writing to the file.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">338</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/rank_stats.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">338</span>
 <span class="normal">339</span>
 <span class="normal">340</span>
 <span class="normal">341</span>
@@ -4061,7 +4248,7 @@ <h3 id="src.pheval.analyse.rank_stats.RankStatsWriter.write_row" class="doc doc-
     <span class="k">except</span> <span class="ne">IOError</span><span class="p">:</span>
         <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;Error writing &quot;</span><span class="p">,</span> <span class="bp">self</span><span class="o">.</span><span class="n">file</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
@@ -4072,6 +4259,7 @@ <h3 id="src.pheval.analyse.rank_stats.RankStatsWriter.write_row" class="doc doc-
 
   </div>
 
+
 </div>
 
 
diff --git a/api/pheval/analyse/run_data_parser/index.html b/api/pheval/analyse/run_data_parser/index.html
index 8c5ac0f1d..b9a825be0 100644
--- a/api/pheval/analyse/run_data_parser/index.html
+++ b/api/pheval/analyse/run_data_parser/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/analyse/run_data_parser/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -673,7 +673,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.run_data_parser" class="md-nav__link">
-    src.pheval.analyse.run_data_parser
+    run_data_parser
   </a>
   
 </li>
@@ -687,7 +687,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.run_data_parser.parse_run_data_text_file" class="md-nav__link">
-    parse_run_data_text_file()
+    parse_run_data_text_file
   </a>
   
 </li>
@@ -1221,7 +1221,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.run_data_parser" class="md-nav__link">
-    src.pheval.analyse.run_data_parser
+    run_data_parser
   </a>
   
 </li>
@@ -1235,7 +1235,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.run_data_parser.parse_run_data_text_file" class="md-nav__link">
-    parse_run_data_text_file()
+    parse_run_data_text_file
   </a>
   
 </li>
@@ -1265,6 +1265,7 @@ <h1>Run data parser</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.analyse.run_data_parser"></a>
   <div class="doc doc-contents first">
 
@@ -1284,7 +1285,7 @@ <h1>Run data parser</h1>
 
 
 <h2 id="src.pheval.analyse.run_data_parser.TrackInputOutputDirectories" class="doc doc-heading">
-        <code>TrackInputOutputDirectories</code>
+          <code>TrackInputOutputDirectories</code>
 
   
   <span class="doc doc-labels">
@@ -1299,6 +1300,8 @@ <h2 id="src.pheval.analyse.run_data_parser.TrackInputOutputDirectories" class="d
   
       <p>Track the input phenopacket test data for a corresponding pheval output directory.</p>
 
+
+
   <p><strong>Attributes:</strong></p>
   <table>
     <thead>
@@ -1310,26 +1313,33 @@ <h2 id="src.pheval.analyse.run_data_parser.TrackInputOutputDirectories" class="d
     </thead>
     <tbody>
         <tr>
-          <td><code>phenopacket_dir</code></td>
+          <td><code><span title="src.pheval.analyse.run_data_parser.TrackInputOutputDirectories.phenopacket_dir">phenopacket_dir</span></code></td>
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory containing input phenopackets.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory containing input phenopackets.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>results_dir</code></td>
+          <td><code><span title="src.pheval.analyse.run_data_parser.TrackInputOutputDirectories.results_dir">results_dir</span></code></td>
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory containing output results from pheval.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory containing output results from pheval.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/analyse/run_data_parser.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 8</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/analyse/run_data_parser.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 8</span>
 <span class="normal"> 9</span>
 <span class="normal">10</span>
 <span class="normal">11</span>
@@ -1353,7 +1363,7 @@ <h2 id="src.pheval.analyse.run_data_parser.TrackInputOutputDirectories" class="d
     <span class="n">phenopacket_dir</span><span class="p">:</span> <span class="n">Path</span>
     <span class="n">results_dir</span><span class="p">:</span> <span class="n">Path</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1373,15 +1383,17 @@ <h2 id="src.pheval.analyse.run_data_parser.TrackInputOutputDirectories" class="d
 
   </div>
 
+
 </div>
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.analyse.run_data_parser.parse_run_data_text_file" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">parse_run_data_text_file</span><span class="p">(</span><span class="n">run_data_path</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">parse_run_data_text_file</span><span class="p">(</span><span class="n">run_data_path</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1390,6 +1402,8 @@ <h2 id="src.pheval.analyse.run_data_parser.parse_run_data_text_file" class="doc
   
       <p>Parse run data .txt file returning a list of input phenopacket and corresponding output directories.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1406,7 +1420,11 @@ <h2 id="src.pheval.analyse.run_data_parser.parse_run_data_text_file" class="doc
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The path to the run data .txt file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The path to the run data .txt file.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1414,6 +1432,8 @@ <h2 id="src.pheval.analyse.run_data_parser.parse_run_data_text_file" class="doc
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -1427,26 +1447,34 @@ <h2 id="src.pheval.analyse.run_data_parser.parse_run_data_text_file" class="doc
           <td>
                 <code><span title="typing.List">List</span>[<a class="autorefs autorefs-internal" title="src.pheval.analyse.run_data_parser.TrackInputOutputDirectories" href="#src.pheval.analyse.run_data_parser.TrackInputOutputDirectories">TrackInputOutputDirectories</a>]</code>
           </td>
-          <td><p>List[TrackInputOutputDirectories]: A list of TrackInputOutputDirectories objects, containing</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List[TrackInputOutputDirectories]: A list of TrackInputOutputDirectories objects, containing</p>
+            </div>
+          </td>
         </tr>
         <tr>
           <td>
                 <code><span title="typing.List">List</span>[<a class="autorefs autorefs-internal" title="src.pheval.analyse.run_data_parser.TrackInputOutputDirectories" href="#src.pheval.analyse.run_data_parser.TrackInputOutputDirectories">TrackInputOutputDirectories</a>]</code>
           </td>
-          <td><p>input test data directories and their corresponding output directories.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>input test data directories and their corresponding output directories.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-<details class="notes">
+<details class="notes" open>
   <summary>Notes</summary>
   <p>The run data .txt file should be formatted with tab-separated values. Each row should contain
 two columns: the first column representing the input test data phenopacket directory, and
 the second column representing the corresponding run output directory.</p>
 </details>
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/run_data_parser.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">22</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/run_data_parser.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">22</span>
 <span class="normal">23</span>
 <span class="normal">24</span>
 <span class="normal">25</span>
@@ -1492,7 +1520,7 @@ <h2 id="src.pheval.analyse.run_data_parser.parse_run_data_text_file" class="doc
         <span class="p">)</span>
     <span class="k">return</span> <span class="n">run_data_list</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
diff --git a/api/pheval/analyse/variant_prioritisation_analysis/index.html b/api/pheval/analyse/variant_prioritisation_analysis/index.html
index c62b692a2..659cae035 100644
--- a/api/pheval/analyse/variant_prioritisation_analysis/index.html
+++ b/api/pheval/analyse/variant_prioritisation_analysis/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/analyse/variant_prioritisation_analysis/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -687,7 +687,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.variant_prioritisation_analysis" class="md-nav__link">
-    src.pheval.analyse.variant_prioritisation_analysis
+    variant_prioritisation_analysis
   </a>
   
 </li>
@@ -702,14 +702,14 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioritisation.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioritisation.assess_variant_prioritisation" class="md-nav__link">
-    assess_variant_prioritisation()
+    assess_variant_prioritisation
   </a>
   
 </li>
@@ -721,14 +721,14 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.variant_prioritisation_analysis.assess_phenopacket_variant_prioritisation" class="md-nav__link">
-    assess_phenopacket_variant_prioritisation()
+    assess_phenopacket_variant_prioritisation
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.variant_prioritisation_analysis.benchmark_variant_prioritisation" class="md-nav__link">
-    benchmark_variant_prioritisation()
+    benchmark_variant_prioritisation
   </a>
   
 </li>
@@ -1248,7 +1248,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.variant_prioritisation_analysis" class="md-nav__link">
-    src.pheval.analyse.variant_prioritisation_analysis
+    variant_prioritisation_analysis
   </a>
   
 </li>
@@ -1263,14 +1263,14 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioritisation.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioritisation.assess_variant_prioritisation" class="md-nav__link">
-    assess_variant_prioritisation()
+    assess_variant_prioritisation
   </a>
   
 </li>
@@ -1282,14 +1282,14 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.variant_prioritisation_analysis.assess_phenopacket_variant_prioritisation" class="md-nav__link">
-    assess_phenopacket_variant_prioritisation()
+    assess_phenopacket_variant_prioritisation
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.analyse.variant_prioritisation_analysis.benchmark_variant_prioritisation" class="md-nav__link">
-    benchmark_variant_prioritisation()
+    benchmark_variant_prioritisation
   </a>
   
 </li>
@@ -1319,6 +1319,7 @@ <h1>Variant prioritisation analysis</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.analyse.variant_prioritisation_analysis"></a>
   <div class="doc doc-contents first">
 
@@ -1338,7 +1339,7 @@ <h1>Variant prioritisation analysis</h1>
 
 
 <h2 id="src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioritisation" class="doc doc-heading">
-        <code>AssessVariantPrioritisation</code>
+          <code>AssessVariantPrioritisation</code>
 
 
 </h2>
@@ -1349,10 +1350,9 @@ <h2 id="src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioriti
   
       <p>Class for assessing variant prioritisation based on thresholds and scoring orders.</p>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/analyse/variant_prioritisation_analysis.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 17</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/analyse/variant_prioritisation_analysis.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 17</span>
 <span class="normal"> 18</span>
 <span class="normal"> 19</span>
 <span class="normal"> 20</span>
@@ -1702,7 +1702,7 @@ <h2 id="src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioriti
             <span class="bp">self</span><span class="o">.</span><span class="n">standardised_variant_results</span><span class="p">,</span> <span class="n">relevant_ranks</span>
         <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1716,12 +1716,13 @@ <h2 id="src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioriti
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioritisation.__init__" class="doc doc-heading">
-<code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">results_dir</span><span class="p">,</span> <span class="n">standardised_variant_results</span><span class="p">,</span> <span class="n">threshold</span><span class="p">,</span> <span class="n">score_order</span><span class="p">,</span> <span class="n">proband_causative_variants</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">results_dir</span><span class="p">,</span> <span class="n">standardised_variant_results</span><span class="p">,</span> <span class="n">threshold</span><span class="p">,</span> <span class="n">score_order</span><span class="p">,</span> <span class="n">proband_causative_variants</span><span class="p">)</span></code>
 
 </h3>
 
@@ -1730,6 +1731,8 @@ <h3 id="src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioriti
   
       <p>Initialise AssessVariantPrioritisation class</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1746,7 +1749,11 @@ <h3 id="src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioriti
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the phenopacket file</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the phenopacket file</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1756,7 +1763,11 @@ <h3 id="src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioriti
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the results directory</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the results directory</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1766,7 +1777,11 @@ <h3 id="src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioriti
           <td>
                 <code><span title="typing.List">List</span>[<span title="pheval.post_processing.post_processing.RankedPhEvalVariantResult">RankedPhEvalVariantResult</span>]</code>
           </td>
-          <td><p>List of ranked PhEval variant results</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List of ranked PhEval variant results</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1776,7 +1791,11 @@ <h3 id="src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioriti
           <td>
                 <code>float</code>
           </td>
-          <td><p>Threshold for scores</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Threshold for scores</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1786,7 +1805,11 @@ <h3 id="src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioriti
           <td>
                 <code>str</code>
           </td>
-          <td><p>Score order for results, either ascending or descending</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Score order for results, either ascending or descending</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1796,7 +1819,11 @@ <h3 id="src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioriti
           <td>
                 <code><span title="typing.List">List</span>[<span title="pheval.utils.phenopacket_utils.GenomicVariant">GenomicVariant</span>]</code>
           </td>
-          <td><p>List of proband variants</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List of proband variants</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1804,9 +1831,9 @@ <h3 id="src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioriti
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/variant_prioritisation_analysis.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">20</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/variant_prioritisation_analysis.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">20</span>
 <span class="normal">21</span>
 <span class="normal">22</span>
 <span class="normal">23</span>
@@ -1860,17 +1887,18 @@ <h3 id="src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioriti
     <span class="bp">self</span><span class="o">.</span><span class="n">score_order</span> <span class="o">=</span> <span class="n">score_order</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">proband_causative_variants</span> <span class="o">=</span> <span class="n">proband_causative_variants</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioritisation.assess_variant_prioritisation" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">assess_variant_prioritisation</span><span class="p">(</span><span class="n">rank_stats</span><span class="p">,</span> <span class="n">rank_records</span><span class="p">,</span> <span class="n">binary_classification_stats</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">assess_variant_prioritisation</span><span class="p">(</span><span class="n">rank_stats</span><span class="p">,</span> <span class="n">rank_records</span><span class="p">,</span> <span class="n">binary_classification_stats</span><span class="p">)</span></code>
 
 </h3>
 
@@ -1881,6 +1909,8 @@ <h3 id="src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioriti
 <p>This method assesses the prioritisation of variants based on the provided criteria
 and records ranks using a PrioritisationRankRecorder.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1897,7 +1927,11 @@ <h3 id="src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioriti
           <td>
                 <code><span title="pheval.analyse.rank_stats.RankStats">RankStats</span></code>
           </td>
-          <td><p>RankStats class instance</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>RankStats class instance</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1907,7 +1941,11 @@ <h3 id="src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioriti
           <td>
                 <code><span title="collections.defaultdict">defaultdict</span></code>
           </td>
-          <td><p>A defaultdict to store the correct ranked results.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>A defaultdict to store the correct ranked results.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1917,7 +1955,11 @@ <h3 id="src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioriti
           <td>
                 <code><span title="pheval.analyse.binary_classification_stats.BinaryClassificationStats">BinaryClassificationStats</span></code>
           </td>
-          <td><p>BinaryClassificationStats class instance.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>BinaryClassificationStats class instance.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1925,9 +1967,9 @@ <h3 id="src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioriti
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/variant_prioritisation_analysis.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">142</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/variant_prioritisation_analysis.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">142</span>
 <span class="normal">143</span>
 <span class="normal">144</span>
 <span class="normal">145</span>
@@ -2027,7 +2069,7 @@ <h3 id="src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioriti
         <span class="bp">self</span><span class="o">.</span><span class="n">standardised_variant_results</span><span class="p">,</span> <span class="n">relevant_ranks</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
@@ -2038,15 +2080,17 @@ <h3 id="src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioriti
 
   </div>
 
+
 </div>
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.analyse.variant_prioritisation_analysis.assess_phenopacket_variant_prioritisation" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">assess_phenopacket_variant_prioritisation</span><span class="p">(</span><span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">score_order</span><span class="p">,</span> <span class="n">results_dir_and_input</span><span class="p">,</span> <span class="n">threshold</span><span class="p">,</span> <span class="n">variant_rank_stats</span><span class="p">,</span> <span class="n">variant_rank_comparison</span><span class="p">,</span> <span class="n">variant_binary_classification_stats</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">assess_phenopacket_variant_prioritisation</span><span class="p">(</span><span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">score_order</span><span class="p">,</span> <span class="n">results_dir_and_input</span><span class="p">,</span> <span class="n">threshold</span><span class="p">,</span> <span class="n">variant_rank_stats</span><span class="p">,</span> <span class="n">variant_rank_comparison</span><span class="p">,</span> <span class="n">variant_binary_classification_stats</span><span class="p">)</span></code>
 
 </h2>
 
@@ -2056,6 +2100,8 @@ <h2 id="src.pheval.analyse.variant_prioritisation_analysis.assess_phenopacket_va
       <p>Assess variant prioritisation for a Phenopacket by comparing PhEval standardised variant results
 against the recorded causative variants for a proband in the Phenopacket.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2072,7 +2118,11 @@ <h2 id="src.pheval.analyse.variant_prioritisation_analysis.assess_phenopacket_va
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the Phenopacket.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the Phenopacket.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2082,7 +2132,11 @@ <h2 id="src.pheval.analyse.variant_prioritisation_analysis.assess_phenopacket_va
           <td>
                 <code>str</code>
           </td>
-          <td><p>The order in which scores are arranged, either ascending or descending.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The order in which scores are arranged, either ascending or descending.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2092,7 +2146,11 @@ <h2 id="src.pheval.analyse.variant_prioritisation_analysis.assess_phenopacket_va
           <td>
                 <code><span title="pheval.analyse.run_data_parser.TrackInputOutputDirectories">TrackInputOutputDirectories</span></code>
           </td>
-          <td><p>Input and output directories.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Input and output directories.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2102,7 +2160,11 @@ <h2 id="src.pheval.analyse.variant_prioritisation_analysis.assess_phenopacket_va
           <td>
                 <code>float</code>
           </td>
-          <td><p>Threshold for assessment.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Threshold for assessment.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2112,7 +2174,11 @@ <h2 id="src.pheval.analyse.variant_prioritisation_analysis.assess_phenopacket_va
           <td>
                 <code><span title="pheval.analyse.rank_stats.RankStats">RankStats</span></code>
           </td>
-          <td><p>RankStats class instance.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>RankStats class instance.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2122,7 +2188,11 @@ <h2 id="src.pheval.analyse.variant_prioritisation_analysis.assess_phenopacket_va
           <td>
                 <code><span title="collections.defaultdict">defaultdict</span></code>
           </td>
-          <td><p>Default dictionary for variant rank comparisons.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Default dictionary for variant rank comparisons.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2132,7 +2202,11 @@ <h2 id="src.pheval.analyse.variant_prioritisation_analysis.assess_phenopacket_va
           <td>
                 <code><span title="pheval.analyse.binary_classification_stats.BinaryClassificationStats">BinaryClassificationStats</span></code>
           </td>
-          <td><p>BinaryClassificationStats class instance.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>BinaryClassificationStats class instance.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2140,9 +2214,9 @@ <h2 id="src.pheval.analyse.variant_prioritisation_analysis.assess_phenopacket_va
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/variant_prioritisation_analysis.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">209</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/variant_prioritisation_analysis.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">209</span>
 <span class="normal">210</span>
 <span class="normal">211</span>
 <span class="normal">212</span>
@@ -2216,17 +2290,18 @@ <h2 id="src.pheval.analyse.variant_prioritisation_analysis.assess_phenopacket_va
         <span class="n">variant_rank_stats</span><span class="p">,</span> <span class="n">variant_rank_comparison</span><span class="p">,</span> <span class="n">variant_binary_classification_stats</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.analyse.variant_prioritisation_analysis.benchmark_variant_prioritisation" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">benchmark_variant_prioritisation</span><span class="p">(</span><span class="n">results_directory_and_input</span><span class="p">,</span> <span class="n">score_order</span><span class="p">,</span> <span class="n">threshold</span><span class="p">,</span> <span class="n">variant_rank_comparison</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">benchmark_variant_prioritisation</span><span class="p">(</span><span class="n">results_directory_and_input</span><span class="p">,</span> <span class="n">score_order</span><span class="p">,</span> <span class="n">threshold</span><span class="p">,</span> <span class="n">variant_rank_comparison</span><span class="p">)</span></code>
 
 </h2>
 
@@ -2235,6 +2310,8 @@ <h2 id="src.pheval.analyse.variant_prioritisation_analysis.benchmark_variant_pri
   
       <p>Benchmark a directory based on variant prioritisation results.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2251,7 +2328,11 @@ <h2 id="src.pheval.analyse.variant_prioritisation_analysis.benchmark_variant_pri
           <td>
                 <code><span title="pheval.analyse.run_data_parser.TrackInputOutputDirectories">TrackInputOutputDirectories</span></code>
           </td>
-          <td><p>Input and output directories.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Input and output directories.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2261,7 +2342,11 @@ <h2 id="src.pheval.analyse.variant_prioritisation_analysis.benchmark_variant_pri
           <td>
                 <code>str</code>
           </td>
-          <td><p>The order in which scores are arranged.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The order in which scores are arranged.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2271,7 +2356,11 @@ <h2 id="src.pheval.analyse.variant_prioritisation_analysis.benchmark_variant_pri
           <td>
                 <code>float</code>
           </td>
-          <td><p>Threshold for assessment.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Threshold for assessment.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2281,7 +2370,11 @@ <h2 id="src.pheval.analyse.variant_prioritisation_analysis.benchmark_variant_pri
           <td>
                 <code><span title="collections.defaultdict">defaultdict</span></code>
           </td>
-          <td><p>Default dictionary for variant rank comparisons.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Default dictionary for variant rank comparisons.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2289,6 +2382,8 @@ <h2 id="src.pheval.analyse.variant_prioritisation_analysis.benchmark_variant_pri
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2301,19 +2396,27 @@ <h2 id="src.pheval.analyse.variant_prioritisation_analysis.benchmark_variant_pri
         <tr>
 <td><code>BenchmarkRunResults</code></td>          <td>
           </td>
-          <td><p>An object containing benchmarking results for variant prioritisation,</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>An object containing benchmarking results for variant prioritisation,</p>
+            </div>
+          </td>
         </tr>
         <tr>
 <td></td>          <td>
           </td>
-          <td><p>including ranks and rank statistics for the benchmarked directory.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>including ranks and rank statistics for the benchmarked directory.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/analyse/variant_prioritisation_analysis.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">248</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/analyse/variant_prioritisation_analysis.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">248</span>
 <span class="normal">249</span>
 <span class="normal">250</span>
 <span class="normal">251</span>
@@ -2387,7 +2490,7 @@ <h2 id="src.pheval.analyse.variant_prioritisation_analysis.benchmark_variant_pri
         <span class="n">binary_classification_stats</span><span class="o">=</span><span class="n">variant_binary_classification_stats</span><span class="p">,</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
diff --git a/api/pheval/cli/index.html b/api/pheval/cli/index.html
index 033f59c4a..a4eabed6c 100644
--- a/api/pheval/cli/index.html
+++ b/api/pheval/cli/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/cli/">
       
       <link rel="icon" href="../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -1856,13 +1856,13 @@ <h6 id="generate-stats-plot">generate-stats-plot</h6>
 <tr>
 <td><code>--variant-analysis</code> / <code>--no-variant-analysis</code></td>
 <td>boolean</td>
-<td>Specify analysis for variant prioritisation NOTE: This argument is mutually exclusive with  arguments: [gene_analysis, disease_analysis].</td>
+<td>Specify analysis for variant prioritisation NOTE: This argument is mutually exclusive with  arguments: [disease_analysis, gene_analysis].</td>
 <td><code>False</code></td>
 </tr>
 <tr>
 <td><code>--disease-analysis</code> / <code>--no-disease-analysis</code></td>
 <td>boolean</td>
-<td>Specify analysis for disease prioritisation NOTE: This argument is mutually exclusive with  arguments: [gene_analysis, variant_analysis].</td>
+<td>Specify analysis for disease prioritisation NOTE: This argument is mutually exclusive with  arguments: [variant_analysis, gene_analysis].</td>
 <td><code>False</code></td>
 </tr>
 <tr>
diff --git a/api/pheval/config_parser/index.html b/api/pheval/config_parser/index.html
index 8c7dec36b..3905e04c6 100644
--- a/api/pheval/config_parser/index.html
+++ b/api/pheval/config_parser/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/config_parser/">
       
       <link rel="icon" href="../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -421,7 +421,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.config_parser" class="md-nav__link">
-    src.pheval.config_parser
+    config_parser
   </a>
   
 </li>
@@ -435,7 +435,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.config_parser.parse_input_dir_config" class="md-nav__link">
-    parse_input_dir_config()
+    parse_input_dir_config
   </a>
   
 </li>
@@ -1219,7 +1219,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.config_parser" class="md-nav__link">
-    src.pheval.config_parser
+    config_parser
   </a>
   
 </li>
@@ -1233,7 +1233,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.config_parser.parse_input_dir_config" class="md-nav__link">
-    parse_input_dir_config()
+    parse_input_dir_config
   </a>
   
 </li>
@@ -1263,6 +1263,7 @@ <h1>Config parser</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.config_parser"></a>
   <div class="doc doc-contents first">
 
@@ -1282,7 +1283,7 @@ <h1>Config parser</h1>
 
 
 <h2 id="src.pheval.config_parser.InputDirConfig" class="doc doc-heading">
-        <code>InputDirConfig</code>
+          <code>InputDirConfig</code>
 
   
   <span class="doc doc-labels">
@@ -1297,6 +1298,8 @@ <h2 id="src.pheval.config_parser.InputDirConfig" class="doc doc-heading">
   
       <p>Class for defining the fields within the input directory config.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1313,7 +1316,11 @@ <h2 id="src.pheval.config_parser.InputDirConfig" class="doc doc-heading">
           <td>
                 <code>str</code>
           </td>
-          <td><p>Name of the tool implementation (e.g. exomiser/phen2gene)</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Name of the tool implementation (e.g. exomiser/phen2gene)</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1323,7 +1330,11 @@ <h2 id="src.pheval.config_parser.InputDirConfig" class="doc doc-heading">
           <td>
                 <code>str</code>
           </td>
-          <td><p>Version of the tool implementation</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Version of the tool implementation</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1333,7 +1344,11 @@ <h2 id="src.pheval.config_parser.InputDirConfig" class="doc doc-heading">
           <td>
                 <code>bool</code>
           </td>
-          <td><p>Whether to extract prioritised variants from results.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Whether to extract prioritised variants from results.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1343,7 +1358,11 @@ <h2 id="src.pheval.config_parser.InputDirConfig" class="doc doc-heading">
           <td>
                 <code>bool</code>
           </td>
-          <td><p>Whether to extract prioritised genes from results.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Whether to extract prioritised genes from results.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1353,7 +1372,11 @@ <h2 id="src.pheval.config_parser.InputDirConfig" class="doc doc-heading">
           <td>
                 <code>bool</code>
           </td>
-          <td><p>Whether to extract prioritised diseases from results.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Whether to extract prioritised diseases from results.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1363,7 +1386,11 @@ <h2 id="src.pheval.config_parser.InputDirConfig" class="doc doc-heading">
           <td>
                 <code><span title="typing.Any">Any</span></code>
           </td>
-          <td><p>Tool specific configurations</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Tool specific configurations</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1371,10 +1398,9 @@ <h2 id="src.pheval.config_parser.InputDirConfig" class="doc doc-heading">
     </tbody>
   </table>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/config_parser.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">10</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/config_parser.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">10</span>
 <span class="normal">11</span>
 <span class="normal">12</span>
 <span class="normal">13</span>
@@ -1420,7 +1446,7 @@ <h2 id="src.pheval.config_parser.InputDirConfig" class="doc doc-heading">
     <span class="n">disease_analysis</span><span class="p">:</span> <span class="nb">bool</span>
     <span class="n">tool_specific_configuration_options</span><span class="p">:</span> <span class="n">Any</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1440,15 +1466,17 @@ <h2 id="src.pheval.config_parser.InputDirConfig" class="doc doc-heading">
 
   </div>
 
+
 </div>
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.config_parser.parse_input_dir_config" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">parse_input_dir_config</span><span class="p">(</span><span class="n">input_dir</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">parse_input_dir_config</span><span class="p">(</span><span class="n">input_dir</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1457,9 +1485,9 @@ <h2 id="src.pheval.config_parser.parse_input_dir_config" class="doc doc-heading"
   
       <p>Reads the config file.</p>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/config_parser.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">35</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/config_parser.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">35</span>
 <span class="normal">36</span>
 <span class="normal">37</span>
 <span class="normal">38</span>
@@ -1471,7 +1499,7 @@ <h2 id="src.pheval.config_parser.parse_input_dir_config" class="doc doc-heading"
     <span class="n">config_file</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
     <span class="k">return</span> <span class="n">from_yaml</span><span class="p">(</span><span class="n">InputDirConfig</span><span class="p">,</span> <span class="n">yaml</span><span class="o">.</span><span class="n">dump</span><span class="p">(</span><span class="n">config</span><span class="p">))</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
diff --git a/api/pheval/constants/index.html b/api/pheval/constants/index.html
index f9348e583..db336da5e 100644
--- a/api/pheval/constants/index.html
+++ b/api/pheval/constants/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/constants/">
       
       <link rel="icon" href="../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -435,7 +435,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.constants" class="md-nav__link">
-    src.pheval.constants
+    constants
   </a>
   
 </li>
@@ -1205,7 +1205,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.constants" class="md-nav__link">
-    src.pheval.constants
+    constants
   </a>
   
 </li>
@@ -1235,6 +1235,7 @@ <h1>Constants</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.constants"></a>
   <div class="doc doc-contents first">
 
diff --git a/api/pheval/infra/exomiserdb/index.html b/api/pheval/infra/exomiserdb/index.html
index a83fbbdc2..8bbb60f9d 100644
--- a/api/pheval/infra/exomiserdb/index.html
+++ b/api/pheval/infra/exomiserdb/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/infra/exomiserdb/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -734,7 +734,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.infra.exomiserdb" class="md-nav__link">
-    src.pheval.infra.exomiserdb
+    exomiserdb
   </a>
   
 </li>
@@ -749,7 +749,7 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.infra.exomiserdb.DBConnection.get_connection" class="md-nav__link">
-    get_connection()
+    get_connection
   </a>
   
 </li>
@@ -769,7 +769,7 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.infra.exomiserdb.DBConnector.create_connection" class="md-nav__link">
-    create_connection()
+    create_connection
   </a>
   
 </li>
@@ -789,7 +789,7 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.infra.exomiserdb.ExomiserDB.import_from_semsim_file" class="md-nav__link">
-    import_from_semsim_file()
+    import_from_semsim_file
   </a>
   
 </li>
@@ -1267,7 +1267,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.infra.exomiserdb" class="md-nav__link">
-    src.pheval.infra.exomiserdb
+    exomiserdb
   </a>
   
 </li>
@@ -1282,7 +1282,7 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.infra.exomiserdb.DBConnection.get_connection" class="md-nav__link">
-    get_connection()
+    get_connection
   </a>
   
 </li>
@@ -1302,7 +1302,7 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.infra.exomiserdb.DBConnector.create_connection" class="md-nav__link">
-    create_connection()
+    create_connection
   </a>
   
 </li>
@@ -1322,7 +1322,7 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.infra.exomiserdb.ExomiserDB.import_from_semsim_file" class="md-nav__link">
-    import_from_semsim_file()
+    import_from_semsim_file
   </a>
   
 </li>
@@ -1357,6 +1357,7 @@ <h1>Exomiserdb</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.infra.exomiserdb"></a>
   <div class="doc doc-contents first">
 
@@ -1376,7 +1377,7 @@ <h1>Exomiserdb</h1>
 
 
 <h2 id="src.pheval.infra.exomiserdb.DBConnection" class="doc doc-heading">
-        <code>DBConnection</code>
+          <code>DBConnection</code>
 
 
 </h2>
@@ -1385,10 +1386,9 @@ <h2 id="src.pheval.infra.exomiserdb.DBConnection" class="doc doc-heading">
   <div class="doc doc-contents ">
 
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/infra/exomiserdb.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">43</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/infra/exomiserdb.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">43</span>
 <span class="normal">44</span>
 <span class="normal">45</span>
 <span class="normal">46</span>
@@ -1424,7 +1424,7 @@ <h2 id="src.pheval.infra.exomiserdb.DBConnection" class="doc doc-heading">
         <span class="n">connection</span> <span class="o">=</span> <span class="bp">cls</span><span class="o">.</span><span class="n">get_connection</span><span class="p">()</span>
         <span class="k">return</span> <span class="n">connection</span><span class="o">.</span><span class="n">cursor</span><span class="p">()</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1438,12 +1438,13 @@ <h2 id="src.pheval.infra.exomiserdb.DBConnection" class="doc doc-heading">
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.infra.exomiserdb.DBConnection.get_connection" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">get_connection</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">get_connection</span><span class="p">()</span></code>
   
   <span class="doc doc-labels">
       <small class="doc doc-label doc-label-classmethod"><code>classmethod</code></small>
@@ -1456,9 +1457,9 @@ <h3 id="src.pheval.infra.exomiserdb.DBConnection.get_connection" class="doc doc-
   
       <p>Creates return new Singleton database connection</p>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/infra/exomiserdb.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">49</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/infra/exomiserdb.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">49</span>
 <span class="normal">50</span>
 <span class="normal">51</span>
 <span class="normal">52</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@classmethod</span>
@@ -1466,7 +1467,7 @@ <h3 id="src.pheval.infra.exomiserdb.DBConnection.get_connection" class="doc doc-
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;Creates return new Singleton database connection&quot;&quot;&quot;</span>
     <span class="k">return</span> <span class="n">DBConnection</span><span class="o">.</span><span class="n">connection</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
@@ -1477,6 +1478,7 @@ <h3 id="src.pheval.infra.exomiserdb.DBConnection.get_connection" class="doc doc-
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -1484,7 +1486,7 @@ <h3 id="src.pheval.infra.exomiserdb.DBConnection.get_connection" class="doc doc-
 
 
 <h2 id="src.pheval.infra.exomiserdb.DBConnector" class="doc doc-heading">
-        <code>DBConnector</code>
+          <code>DBConnector</code>
 
 
 </h2>
@@ -1493,10 +1495,9 @@ <h2 id="src.pheval.infra.exomiserdb.DBConnector" class="doc doc-heading">
   <div class="doc doc-contents ">
 
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/infra/exomiserdb.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">14</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/infra/exomiserdb.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">14</span>
 <span class="normal">15</span>
 <span class="normal">16</span>
 <span class="normal">17</span>
@@ -1550,7 +1551,7 @@ <h2 id="src.pheval.infra.exomiserdb.DBConnector" class="doc doc-heading">
     <span class="k">def</span> <span class="fm">__exit__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="n">other</span><span class="p">):</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">dbconn</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1564,12 +1565,13 @@ <h2 id="src.pheval.infra.exomiserdb.DBConnector" class="doc doc-heading">
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.infra.exomiserdb.DBConnector.create_connection" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">create_connection</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">create_connection</span><span class="p">()</span></code>
 
 </h3>
 
@@ -1578,9 +1580,9 @@ <h3 id="src.pheval.infra.exomiserdb.DBConnector.create_connection" class="doc do
   
       <p>creates h2 database connection</p>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/infra/exomiserdb.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">26</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/infra/exomiserdb.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">26</span>
 <span class="normal">27</span>
 <span class="normal">28</span>
 <span class="normal">29</span>
@@ -1596,7 +1598,7 @@ <h3 id="src.pheval.infra.exomiserdb.DBConnector.create_connection" class="doc do
         <span class="bp">self</span><span class="o">.</span><span class="n">jar</span><span class="p">,</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
@@ -1607,6 +1609,7 @@ <h3 id="src.pheval.infra.exomiserdb.DBConnector.create_connection" class="doc do
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -1614,7 +1617,7 @@ <h3 id="src.pheval.infra.exomiserdb.DBConnector.create_connection" class="doc do
 
 
 <h2 id="src.pheval.infra.exomiserdb.ExomiserDB" class="doc doc-heading">
-        <code>ExomiserDB</code>
+          <code>ExomiserDB</code>
 
 
 </h2>
@@ -1623,10 +1626,9 @@ <h2 id="src.pheval.infra.exomiserdb.ExomiserDB" class="doc doc-heading">
   <div class="doc doc-contents ">
 
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/infra/exomiserdb.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 63</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/infra/exomiserdb.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 63</span>
 <span class="normal"> 64</span>
 <span class="normal"> 65</span>
 <span class="normal"> 66</span>
@@ -1710,7 +1712,7 @@ <h2 id="src.pheval.infra.exomiserdb.ExomiserDB" class="doc doc-heading">
 
                 <span class="n">batches</span> <span class="o">=</span> <span class="n">reader</span><span class="o">.</span><span class="n">next_batches</span><span class="p">(</span><span class="n">batch_length</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1724,12 +1726,13 @@ <h2 id="src.pheval.infra.exomiserdb.ExomiserDB" class="doc doc-heading">
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.infra.exomiserdb.ExomiserDB.import_from_semsim_file" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">import_from_semsim_file</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">subject_prefix</span><span class="p">,</span> <span class="n">object_prefix</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">import_from_semsim_file</span><span class="p">(</span><span class="n">input_file</span><span class="p">,</span> <span class="n">subject_prefix</span><span class="p">,</span> <span class="n">object_prefix</span><span class="p">)</span></code>
 
 </h3>
 
@@ -1738,6 +1741,8 @@ <h3 id="src.pheval.infra.exomiserdb.ExomiserDB.import_from_semsim_file" class="d
   
       <p>imports semsim tsv profile into exomiser phenotype database</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1754,7 +1759,11 @@ <h3 id="src.pheval.infra.exomiserdb.ExomiserDB.import_from_semsim_file" class="d
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>semsim profile</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>semsim profile</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1764,7 +1773,11 @@ <h3 id="src.pheval.infra.exomiserdb.ExomiserDB.import_from_semsim_file" class="d
           <td>
                 <code>str</code>
           </td>
-          <td><p>Subject Prefix. e.g HP</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Subject Prefix. e.g HP</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1774,7 +1787,11 @@ <h3 id="src.pheval.infra.exomiserdb.ExomiserDB.import_from_semsim_file" class="d
           <td>
                 <code>str</code>
           </td>
-          <td><p>Object Prefix. e.g MP</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Object Prefix. e.g MP</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1782,9 +1799,9 @@ <h3 id="src.pheval.infra.exomiserdb.ExomiserDB.import_from_semsim_file" class="d
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/infra/exomiserdb.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 77</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/infra/exomiserdb.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 77</span>
 <span class="normal"> 78</span>
 <span class="normal"> 79</span>
 <span class="normal"> 80</span>
@@ -1840,7 +1857,7 @@ <h3 id="src.pheval.infra.exomiserdb.ExomiserDB.import_from_semsim_file" class="d
 
             <span class="n">batches</span> <span class="o">=</span> <span class="n">reader</span><span class="o">.</span><span class="n">next_batches</span><span class="p">(</span><span class="n">batch_length</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
@@ -1851,6 +1868,7 @@ <h3 id="src.pheval.infra.exomiserdb.ExomiserDB.import_from_semsim_file" class="d
 
   </div>
 
+
 </div>
 
 
diff --git a/api/pheval/post_processing/post_processing/index.html b/api/pheval/post_processing/post_processing/index.html
index 925411ba2..37e49c1c7 100644
--- a/api/pheval/post_processing/post_processing/index.html
+++ b/api/pheval/post_processing/post_processing/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/post_processing/post_processing/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -781,7 +781,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.post_processing.post_processing" class="md-nav__link">
-    src.pheval.post_processing.post_processing
+    post_processing
   </a>
   
 </li>
@@ -824,7 +824,7 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.post_processing.post_processing.RankedPhEvalDiseaseResult.from_disease_result" class="md-nav__link">
-    from_disease_result()
+    from_disease_result
   </a>
   
 </li>
@@ -844,7 +844,7 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.post_processing.post_processing.RankedPhEvalGeneResult.from_gene_result" class="md-nav__link">
-    from_gene_result()
+    from_gene_result
   </a>
   
 </li>
@@ -864,7 +864,7 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.post_processing.post_processing.RankedPhEvalVariantResult.from_variant_result" class="md-nav__link">
-    from_variant_result()
+    from_variant_result
   </a>
   
 </li>
@@ -884,14 +884,14 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.post_processing.post_processing.ResultSorter.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.post_processing.post_processing.ResultSorter.sort_pheval_results" class="md-nav__link">
-    sort_pheval_results()
+    sort_pheval_results
   </a>
   
 </li>
@@ -930,14 +930,14 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.post_processing.post_processing.calculate_end_pos" class="md-nav__link">
-    calculate_end_pos()
+    calculate_end_pos
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.post_processing.post_processing.generate_pheval_result" class="md-nav__link">
-    generate_pheval_result()
+    generate_pheval_result
   </a>
   
 </li>
@@ -1363,7 +1363,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.post_processing.post_processing" class="md-nav__link">
-    src.pheval.post_processing.post_processing
+    post_processing
   </a>
   
 </li>
@@ -1406,7 +1406,7 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.post_processing.post_processing.RankedPhEvalDiseaseResult.from_disease_result" class="md-nav__link">
-    from_disease_result()
+    from_disease_result
   </a>
   
 </li>
@@ -1426,7 +1426,7 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.post_processing.post_processing.RankedPhEvalGeneResult.from_gene_result" class="md-nav__link">
-    from_gene_result()
+    from_gene_result
   </a>
   
 </li>
@@ -1446,7 +1446,7 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.post_processing.post_processing.RankedPhEvalVariantResult.from_variant_result" class="md-nav__link">
-    from_variant_result()
+    from_variant_result
   </a>
   
 </li>
@@ -1466,14 +1466,14 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.post_processing.post_processing.ResultSorter.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.post_processing.post_processing.ResultSorter.sort_pheval_results" class="md-nav__link">
-    sort_pheval_results()
+    sort_pheval_results
   </a>
   
 </li>
@@ -1512,14 +1512,14 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.post_processing.post_processing.calculate_end_pos" class="md-nav__link">
-    calculate_end_pos()
+    calculate_end_pos
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.post_processing.post_processing.generate_pheval_result" class="md-nav__link">
-    generate_pheval_result()
+    generate_pheval_result
   </a>
   
 </li>
@@ -1549,6 +1549,7 @@ <h1>Post processing</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.post_processing.post_processing"></a>
   <div class="doc doc-contents first">
 
@@ -1568,7 +1569,7 @@ <h1>Post processing</h1>
 
 
 <h2 id="src.pheval.post_processing.post_processing.PhEvalDiseaseResult" class="doc doc-heading">
-        <code>PhEvalDiseaseResult</code>
+          <code>PhEvalDiseaseResult</code>
 
   
   <span class="doc doc-labels">
@@ -1579,66 +1580,23 @@ <h2 id="src.pheval.post_processing.post_processing.PhEvalDiseaseResult" class="d
 
 
   <div class="doc doc-contents ">
-      <p class="doc doc-class-bases">
-        Bases: <code><a class="autorefs autorefs-internal" title="src.pheval.post_processing.post_processing.PhEvalResult" href="#src.pheval.post_processing.post_processing.PhEvalResult">PhEvalResult</a></code></p>
-
-  
-      <p>Minimal data required from tool-specific output for disease prioritisation</p>
-
-  <p><strong>Parameters:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Name</th>
-        <th>Type</th>
-        <th>Description</th>
-        <th>Default</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-          <td><code>disease_name</code></td>
-          <td>
-                <code>str</code>
-          </td>
-          <td><p>Disease name for the result entry</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>disease_identifier</code></td>
-          <td>
-                <code>str</code>
-          </td>
-          <td><p>Identifier for the disease result entry in the OMIM namespace</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>score</code></td>
-          <td>
-                <code>str</code>
-          </td>
-          <td><p>Score for the disease result entry</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-    </tbody>
-  </table>
-
-<details class="notes">
-  <summary>Notes</summary>
-  <p>While we recommend providing the disease identifier in the OMIM namespace,
-any matching format used in Phenopacket interpretations is acceptable for result matching purposes
-in the analysis.</p>
-</details>
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">131</span>
+          <p class="doc doc-class-bases">
+            Bases: <code><a class="autorefs autorefs-internal" title="src.pheval.post_processing.post_processing.PhEvalResult" href="#src.pheval.post_processing.post_processing.PhEvalResult">PhEvalResult</a></code></p>
+
+  
+      <p>Minimal data required from tool-specific output for disease prioritisation
+Args:
+    disease_name (str): Disease name for the result entry
+    disease_identifier (str): Identifier for the disease result entry in the OMIM namespace
+    score (str): Score for the disease result entry
+Notes:
+    While we recommend providing the disease identifier in the OMIM namespace,
+    any matching format used in Phenopacket interpretations is acceptable for result matching purposes
+    in the analysis.</p>
+
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">131</span>
 <span class="normal">132</span>
 <span class="normal">133</span>
 <span class="normal">134</span>
@@ -1670,7 +1628,7 @@ <h2 id="src.pheval.post_processing.post_processing.PhEvalDiseaseResult" class="d
     <span class="n">disease_identifier</span><span class="p">:</span> <span class="nb">str</span>
     <span class="n">score</span><span class="p">:</span> <span class="nb">float</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1690,6 +1648,7 @@ <h2 id="src.pheval.post_processing.post_processing.PhEvalDiseaseResult" class="d
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -1697,7 +1656,7 @@ <h2 id="src.pheval.post_processing.post_processing.PhEvalDiseaseResult" class="d
 
 
 <h2 id="src.pheval.post_processing.post_processing.PhEvalGeneResult" class="doc doc-heading">
-        <code>PhEvalGeneResult</code>
+          <code>PhEvalGeneResult</code>
 
   
   <span class="doc doc-labels">
@@ -1708,66 +1667,23 @@ <h2 id="src.pheval.post_processing.post_processing.PhEvalGeneResult" class="doc
 
 
   <div class="doc doc-contents ">
-      <p class="doc doc-class-bases">
-        Bases: <code><a class="autorefs autorefs-internal" title="src.pheval.post_processing.post_processing.PhEvalResult" href="#src.pheval.post_processing.post_processing.PhEvalResult">PhEvalResult</a></code></p>
-
-  
-      <p>Minimal data required from tool-specific output for gene prioritisation result</p>
-
-  <p><strong>Parameters:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Name</th>
-        <th>Type</th>
-        <th>Description</th>
-        <th>Default</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-          <td><code>gene_symbol</code></td>
-          <td>
-                <code><span title="typing.Union">Union</span>[<span title="typing.List">List</span>[str], str]</code>
-          </td>
-          <td><p>The gene symbol(s) for the result entry</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>gene_identifier</code></td>
-          <td>
-                <code><span title="typing.Union">Union</span>[<span title="typing.List">List</span>[str], str]</code>
-          </td>
-          <td><p>The ENSEMBL gene identifier(s) for the result entry</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>score</code></td>
-          <td>
-                <code>float</code>
-          </td>
-          <td><p>The score for the gene result entry</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-    </tbody>
-  </table>
-
-<details class="notes">
-  <summary>Notes</summary>
-  <p>While we recommend providing the gene identifier in the ENSEMBL namespace,
-any matching format used in Phenopacket interpretations is acceptable for result matching purposes
-in the analysis.</p>
-</details>
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">30</span>
+          <p class="doc doc-class-bases">
+            Bases: <code><a class="autorefs autorefs-internal" title="src.pheval.post_processing.post_processing.PhEvalResult" href="#src.pheval.post_processing.post_processing.PhEvalResult">PhEvalResult</a></code></p>
+
+  
+      <p>Minimal data required from tool-specific output for gene prioritisation result
+Args:
+    gene_symbol (Union[List[str], str]): The gene symbol(s) for the result entry
+    gene_identifier (Union[List[str], str]): The ENSEMBL gene identifier(s) for the result entry
+    score (float): The score for the gene result entry
+Notes:
+    While we recommend providing the gene identifier in the ENSEMBL namespace,
+    any matching format used in Phenopacket interpretations is acceptable for result matching purposes
+    in the analysis.</p>
+
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">30</span>
 <span class="normal">31</span>
 <span class="normal">32</span>
 <span class="normal">33</span>
@@ -1799,7 +1715,7 @@ <h2 id="src.pheval.post_processing.post_processing.PhEvalGeneResult" class="doc
     <span class="n">gene_identifier</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="nb">str</span><span class="p">]</span>
     <span class="n">score</span><span class="p">:</span> <span class="nb">float</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1819,6 +1735,7 @@ <h2 id="src.pheval.post_processing.post_processing.PhEvalGeneResult" class="doc
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -1826,7 +1743,7 @@ <h2 id="src.pheval.post_processing.post_processing.PhEvalGeneResult" class="doc
 
 
 <h2 id="src.pheval.post_processing.post_processing.PhEvalResult" class="doc doc-heading">
-        <code>PhEvalResult</code>
+          <code>PhEvalResult</code>
 
   
   <span class="doc doc-labels">
@@ -1841,19 +1758,19 @@ <h2 id="src.pheval.post_processing.post_processing.PhEvalResult" class="doc doc-
   
       <p>Base class for PhEval results.</p>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">25</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">25</span>
 <span class="normal">26</span>
 <span class="normal">27</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@dataclass</span>
 <span class="k">class</span> <span class="nc">PhEvalResult</span><span class="p">:</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;Base class for PhEval results.&quot;&quot;&quot;</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -1861,7 +1778,7 @@ <h2 id="src.pheval.post_processing.post_processing.PhEvalResult" class="doc doc-
 
 
 <h2 id="src.pheval.post_processing.post_processing.PhEvalVariantResult" class="doc doc-heading">
-        <code>PhEvalVariantResult</code>
+          <code>PhEvalVariantResult</code>
 
   
   <span class="doc doc-labels">
@@ -1872,96 +1789,28 @@ <h2 id="src.pheval.post_processing.post_processing.PhEvalVariantResult" class="d
 
 
   <div class="doc doc-contents ">
-      <p class="doc doc-class-bases">
-        Bases: <code><a class="autorefs autorefs-internal" title="src.pheval.post_processing.post_processing.PhEvalResult" href="#src.pheval.post_processing.post_processing.PhEvalResult">PhEvalResult</a></code></p>
-
-  
-      <p>Minimal data required from tool-specific output for variant prioritisation</p>
-
-  <p><strong>Parameters:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Name</th>
-        <th>Type</th>
-        <th>Description</th>
-        <th>Default</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-          <td><code>chromosome</code></td>
-          <td>
-                <code>str</code>
-          </td>
-          <td><p>The chromosome position of the variant recommended to be provided in the following format.</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>start</code></td>
-          <td>
-                <code>int</code>
-          </td>
-          <td><p>The start position of the variant</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>end</code></td>
-          <td>
-                <code>int</code>
-          </td>
-          <td><p>The end position of the variant</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>ref</code></td>
-          <td>
-                <code>str</code>
-          </td>
-          <td><p>The reference allele of the variant</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>alt</code></td>
-          <td>
-                <code>str</code>
-          </td>
-          <td><p>The alternate allele of the variant</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>score</code></td>
-          <td>
-                <code>float</code>
-          </td>
-          <td><p>The score for the variant result entry</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-    </tbody>
-  </table>
-
-<details class="notes">
-  <summary>Notes</summary>
-  <p>While we recommend providing the variant's chromosome in the specified format,
-any matching format used in Phenopacket interpretations is acceptable for result matching purposes
-in the analysis.</p>
-</details>
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">75</span>
+          <p class="doc doc-class-bases">
+            Bases: <code><a class="autorefs autorefs-internal" title="src.pheval.post_processing.post_processing.PhEvalResult" href="#src.pheval.post_processing.post_processing.PhEvalResult">PhEvalResult</a></code></p>
+
+  
+      <p>Minimal data required from tool-specific output for variant prioritisation
+Args:
+    chromosome (str): The chromosome position of the variant recommended to be provided in the following format.
+    This includes numerical designations from 1 to 22 representing autosomal chromosomes,
+    as well as the sex chromosomes X and Y, and the mitochondrial chromosome MT.
+    start (int): The start position of the variant
+    end (int): The end position of the variant
+    ref (str): The reference allele of the variant
+    alt (str): The alternate allele of the variant
+    score (float): The score for the variant result entry
+Notes:
+    While we recommend providing the variant's chromosome in the specified format,
+    any matching format used in Phenopacket interpretations is acceptable for result matching purposes
+    in the analysis.</p>
+
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">75</span>
 <span class="normal">76</span>
 <span class="normal">77</span>
 <span class="normal">78</span>
@@ -2009,7 +1858,7 @@ <h2 id="src.pheval.post_processing.post_processing.PhEvalVariantResult" class="d
     <span class="n">alt</span><span class="p">:</span> <span class="nb">str</span>
     <span class="n">score</span><span class="p">:</span> <span class="nb">float</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -2029,6 +1878,7 @@ <h2 id="src.pheval.post_processing.post_processing.PhEvalVariantResult" class="d
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -2036,7 +1886,7 @@ <h2 id="src.pheval.post_processing.post_processing.PhEvalVariantResult" class="d
 
 
 <h2 id="src.pheval.post_processing.post_processing.RankedPhEvalDiseaseResult" class="doc doc-heading">
-        <code>RankedPhEvalDiseaseResult</code>
+          <code>RankedPhEvalDiseaseResult</code>
 
   
   <span class="doc doc-labels">
@@ -2047,40 +1897,17 @@ <h2 id="src.pheval.post_processing.post_processing.RankedPhEvalDiseaseResult" cl
 
 
   <div class="doc doc-contents ">
-      <p class="doc doc-class-bases">
-        Bases: <code><a class="autorefs autorefs-internal" title="src.pheval.post_processing.post_processing.PhEvalDiseaseResult" href="#src.pheval.post_processing.post_processing.PhEvalDiseaseResult">PhEvalDiseaseResult</a></code></p>
+          <p class="doc doc-class-bases">
+            Bases: <code><a class="autorefs autorefs-internal" title="src.pheval.post_processing.post_processing.PhEvalDiseaseResult" href="../../../../developing_a_pheval_plugin/#src.pheval.post_processing.post_processing.PhEvalDiseaseResult">PhEvalDiseaseResult</a></code></p>
 
   
-      <p>PhEval disease result with corresponding rank</p>
-
-  <p><strong>Parameters:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Name</th>
-        <th>Type</th>
-        <th>Description</th>
-        <th>Default</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-          <td><code>rank</code></td>
-          <td>
-                <code>int</code>
-          </td>
-          <td><p>The rank for the result entry</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-    </tbody>
-  </table>
+      <p>PhEval disease result with corresponding rank
+Args:
+    rank (int): The rank for the result entry</p>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">149</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">149</span>
 <span class="normal">150</span>
 <span class="normal">151</span>
 <span class="normal">152</span>
@@ -2130,7 +1957,7 @@ <h2 id="src.pheval.post_processing.post_processing.RankedPhEvalDiseaseResult" cl
             <span class="n">rank</span><span class="o">=</span><span class="n">rank</span><span class="p">,</span>
         <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -2144,12 +1971,13 @@ <h2 id="src.pheval.post_processing.post_processing.RankedPhEvalDiseaseResult" cl
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.post_processing.post_processing.RankedPhEvalDiseaseResult.from_disease_result" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">from_disease_result</span><span class="p">(</span><span class="n">pheval_disease_result</span><span class="p">,</span> <span class="n">rank</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">from_disease_result</span><span class="p">(</span><span class="n">pheval_disease_result</span><span class="p">,</span> <span class="n">rank</span><span class="p">)</span></code>
   
   <span class="doc doc-labels">
       <small class="doc doc-label doc-label-staticmethod"><code>staticmethod</code></small>
@@ -2160,41 +1988,12 @@ <h3 id="src.pheval.post_processing.post_processing.RankedPhEvalDiseaseResult.fro
 
   <div class="doc doc-contents ">
   
-      <p>Return RankedPhEvalDiseaseResult from a PhEvalDiseaseResult and rank</p>
+      <p>Return RankedPhEvalDiseaseResult from a PhEvalDiseaseResult and rank
+Args:
+    pheval_disease_result (PhEvalDiseaseResult): The disease result entry
+    rank (int): The corresponding rank for the result entry</p>
+
 
-  <p><strong>Parameters:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Name</th>
-        <th>Type</th>
-        <th>Description</th>
-        <th>Default</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-          <td><code>pheval_disease_result</code></td>
-          <td>
-                <code><a class="autorefs autorefs-internal" title="src.pheval.post_processing.post_processing.PhEvalDiseaseResult" href="#src.pheval.post_processing.post_processing.PhEvalDiseaseResult">PhEvalDiseaseResult</a></code>
-          </td>
-          <td><p>The disease result entry</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>rank</code></td>
-          <td>
-                <code>int</code>
-          </td>
-          <td><p>The corresponding rank for the result entry</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-    </tbody>
-  </table>
 
   <p><strong>Returns:</strong></p>
   <table>
@@ -2208,14 +2007,18 @@ <h3 id="src.pheval.post_processing.post_processing.RankedPhEvalDiseaseResult.fro
         <tr>
 <td><code>RankedPhEvalDiseaseResult</code></td>          <td>
           </td>
-          <td><p>The result as a RankedPhEvalDiseaseResult</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The result as a RankedPhEvalDiseaseResult</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">158</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">158</span>
 <span class="normal">159</span>
 <span class="normal">160</span>
 <span class="normal">161</span>
@@ -2247,7 +2050,7 @@ <h3 id="src.pheval.post_processing.post_processing.RankedPhEvalDiseaseResult.fro
         <span class="n">rank</span><span class="o">=</span><span class="n">rank</span><span class="p">,</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
@@ -2258,6 +2061,7 @@ <h3 id="src.pheval.post_processing.post_processing.RankedPhEvalDiseaseResult.fro
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -2265,7 +2069,7 @@ <h3 id="src.pheval.post_processing.post_processing.RankedPhEvalDiseaseResult.fro
 
 
 <h2 id="src.pheval.post_processing.post_processing.RankedPhEvalGeneResult" class="doc doc-heading">
-        <code>RankedPhEvalGeneResult</code>
+          <code>RankedPhEvalGeneResult</code>
 
   
   <span class="doc doc-labels">
@@ -2276,40 +2080,17 @@ <h2 id="src.pheval.post_processing.post_processing.RankedPhEvalGeneResult" class
 
 
   <div class="doc doc-contents ">
-      <p class="doc doc-class-bases">
-        Bases: <code><a class="autorefs autorefs-internal" title="src.pheval.post_processing.post_processing.PhEvalGeneResult" href="#src.pheval.post_processing.post_processing.PhEvalGeneResult">PhEvalGeneResult</a></code></p>
+          <p class="doc doc-class-bases">
+            Bases: <code><a class="autorefs autorefs-internal" title="src.pheval.post_processing.post_processing.PhEvalGeneResult" href="../../../../developing_a_pheval_plugin/#src.pheval.post_processing.post_processing.PhEvalGeneResult">PhEvalGeneResult</a></code></p>
 
   
-      <p>PhEval gene result with corresponding rank</p>
-
-  <p><strong>Parameters:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Name</th>
-        <th>Type</th>
-        <th>Description</th>
-        <th>Default</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-          <td><code>rank</code></td>
-          <td>
-                <code>int</code>
-          </td>
-          <td><p>The rank for the result entry</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-    </tbody>
-  </table>
-
+      <p>PhEval gene result with corresponding rank
+Args:
+    rank (int): The rank for the result entry</p>
 
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">48</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">48</span>
 <span class="normal">49</span>
 <span class="normal">50</span>
 <span class="normal">51</span>
@@ -2359,7 +2140,7 @@ <h2 id="src.pheval.post_processing.post_processing.RankedPhEvalGeneResult" class
             <span class="n">rank</span><span class="o">=</span><span class="n">rank</span><span class="p">,</span>
         <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -2373,12 +2154,13 @@ <h2 id="src.pheval.post_processing.post_processing.RankedPhEvalGeneResult" class
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.post_processing.post_processing.RankedPhEvalGeneResult.from_gene_result" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">from_gene_result</span><span class="p">(</span><span class="n">pheval_gene_result</span><span class="p">,</span> <span class="n">rank</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">from_gene_result</span><span class="p">(</span><span class="n">pheval_gene_result</span><span class="p">,</span> <span class="n">rank</span><span class="p">)</span></code>
   
   <span class="doc doc-labels">
       <small class="doc doc-label doc-label-staticmethod"><code>staticmethod</code></small>
@@ -2389,41 +2171,12 @@ <h3 id="src.pheval.post_processing.post_processing.RankedPhEvalGeneResult.from_g
 
   <div class="doc doc-contents ">
   
-      <p>Return RankedPhEvalGeneResult from a PhEvalGeneResult and rank</p>
+      <p>Return RankedPhEvalGeneResult from a PhEvalGeneResult and rank
+Args:
+    pheval_gene_result (PhEvalGeneResult): The gene result entry
+    rank (int): The corresponding rank for the result entry</p>
+
 
-  <p><strong>Parameters:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Name</th>
-        <th>Type</th>
-        <th>Description</th>
-        <th>Default</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-          <td><code>pheval_gene_result</code></td>
-          <td>
-                <code><a class="autorefs autorefs-internal" title="src.pheval.post_processing.post_processing.PhEvalGeneResult" href="#src.pheval.post_processing.post_processing.PhEvalGeneResult">PhEvalGeneResult</a></code>
-          </td>
-          <td><p>The gene result entry</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>rank</code></td>
-          <td>
-                <code>int</code>
-          </td>
-          <td><p>The corresponding rank for the result entry</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-    </tbody>
-  </table>
 
   <p><strong>Returns:</strong></p>
   <table>
@@ -2437,14 +2190,18 @@ <h3 id="src.pheval.post_processing.post_processing.RankedPhEvalGeneResult.from_g
         <tr>
 <td><code>RankedPhEvalGeneResult</code></td>          <td>
           </td>
-          <td><p>The result as a RankedPhEvalGeneResult</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The result as a RankedPhEvalGeneResult</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">57</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">57</span>
 <span class="normal">58</span>
 <span class="normal">59</span>
 <span class="normal">60</span>
@@ -2476,7 +2233,7 @@ <h3 id="src.pheval.post_processing.post_processing.RankedPhEvalGeneResult.from_g
         <span class="n">rank</span><span class="o">=</span><span class="n">rank</span><span class="p">,</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
@@ -2487,6 +2244,7 @@ <h3 id="src.pheval.post_processing.post_processing.RankedPhEvalGeneResult.from_g
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -2494,7 +2252,7 @@ <h3 id="src.pheval.post_processing.post_processing.RankedPhEvalGeneResult.from_g
 
 
 <h2 id="src.pheval.post_processing.post_processing.RankedPhEvalVariantResult" class="doc doc-heading">
-        <code>RankedPhEvalVariantResult</code>
+          <code>RankedPhEvalVariantResult</code>
 
   
   <span class="doc doc-labels">
@@ -2505,40 +2263,17 @@ <h2 id="src.pheval.post_processing.post_processing.RankedPhEvalVariantResult" cl
 
 
   <div class="doc doc-contents ">
-      <p class="doc doc-class-bases">
-        Bases: <code><a class="autorefs autorefs-internal" title="src.pheval.post_processing.post_processing.PhEvalVariantResult" href="#src.pheval.post_processing.post_processing.PhEvalVariantResult">PhEvalVariantResult</a></code></p>
+          <p class="doc doc-class-bases">
+            Bases: <code><a class="autorefs autorefs-internal" title="src.pheval.post_processing.post_processing.PhEvalVariantResult" href="../../../../developing_a_pheval_plugin/#src.pheval.post_processing.post_processing.PhEvalVariantResult">PhEvalVariantResult</a></code></p>
 
   
-      <p>PhEval variant result with corresponding rank</p>
+      <p>PhEval variant result with corresponding rank
+Args:
+    rank (int): The rank for the result entry</p>
 
-  <p><strong>Parameters:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Name</th>
-        <th>Type</th>
-        <th>Description</th>
-        <th>Default</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-          <td><code>rank</code></td>
-          <td>
-                <code>int</code>
-          </td>
-          <td><p>The rank for the result entry</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-    </tbody>
-  </table>
-
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">101</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">101</span>
 <span class="normal">102</span>
 <span class="normal">103</span>
 <span class="normal">104</span>
@@ -2594,7 +2329,7 @@ <h2 id="src.pheval.post_processing.post_processing.RankedPhEvalVariantResult" cl
             <span class="n">rank</span><span class="o">=</span><span class="n">rank</span><span class="p">,</span>
         <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -2608,12 +2343,13 @@ <h2 id="src.pheval.post_processing.post_processing.RankedPhEvalVariantResult" cl
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.post_processing.post_processing.RankedPhEvalVariantResult.from_variant_result" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">from_variant_result</span><span class="p">(</span><span class="n">pheval_variant_result</span><span class="p">,</span> <span class="n">rank</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">from_variant_result</span><span class="p">(</span><span class="n">pheval_variant_result</span><span class="p">,</span> <span class="n">rank</span><span class="p">)</span></code>
   
   <span class="doc doc-labels">
       <small class="doc doc-label doc-label-staticmethod"><code>staticmethod</code></small>
@@ -2624,41 +2360,12 @@ <h3 id="src.pheval.post_processing.post_processing.RankedPhEvalVariantResult.fro
 
   <div class="doc doc-contents ">
   
-      <p>Return RankedPhEvalVariantResult from a PhEvalVariantResult and rank</p>
+      <p>Return RankedPhEvalVariantResult from a PhEvalVariantResult and rank
+Args:
+    pheval_variant_result (PhEvalVariantResult): The variant result entry
+    rank (int): The corresponding rank for the result entry</p>
+
 
-  <p><strong>Parameters:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Name</th>
-        <th>Type</th>
-        <th>Description</th>
-        <th>Default</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-          <td><code>pheval_variant_result</code></td>
-          <td>
-                <code><a class="autorefs autorefs-internal" title="src.pheval.post_processing.post_processing.PhEvalVariantResult" href="#src.pheval.post_processing.post_processing.PhEvalVariantResult">PhEvalVariantResult</a></code>
-          </td>
-          <td><p>The variant result entry</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>rank</code></td>
-          <td>
-                <code>int</code>
-          </td>
-          <td><p>The corresponding rank for the result entry</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-    </tbody>
-  </table>
 
   <p><strong>Returns:</strong></p>
   <table>
@@ -2672,14 +2379,18 @@ <h3 id="src.pheval.post_processing.post_processing.RankedPhEvalVariantResult.fro
         <tr>
 <td><code>RankedPhEvalVariantResult</code></td>          <td>
           </td>
-          <td><p>The result as a RankedPhEvalVariantResult</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The result as a RankedPhEvalVariantResult</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">110</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">110</span>
 <span class="normal">111</span>
 <span class="normal">112</span>
 <span class="normal">113</span>
@@ -2717,7 +2428,7 @@ <h3 id="src.pheval.post_processing.post_processing.RankedPhEvalVariantResult.fro
         <span class="n">rank</span><span class="o">=</span><span class="n">rank</span><span class="p">,</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
@@ -2728,6 +2439,7 @@ <h3 id="src.pheval.post_processing.post_processing.RankedPhEvalVariantResult.fro
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -2735,7 +2447,7 @@ <h3 id="src.pheval.post_processing.post_processing.RankedPhEvalVariantResult.fro
 
 
 <h2 id="src.pheval.post_processing.post_processing.ResultSorter" class="doc doc-heading">
-        <code>ResultSorter</code>
+          <code>ResultSorter</code>
 
 
 </h2>
@@ -2746,10 +2458,9 @@ <h2 id="src.pheval.post_processing.post_processing.ResultSorter" class="doc doc-
   
       <p>Class for sorting PhEvalResult instances based on a given sort order.</p>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">185</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">185</span>
 <span class="normal">186</span>
 <span class="normal">187</span>
 <span class="normal">188</span>
@@ -2837,7 +2548,7 @@ <h2 id="src.pheval.post_processing.post_processing.ResultSorter" class="doc doc-
             <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">_sort_by_decreasing_score</span><span class="p">()</span>
         <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -2851,12 +2562,13 @@ <h2 id="src.pheval.post_processing.post_processing.ResultSorter" class="doc doc-
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.post_processing.post_processing.ResultSorter.__init__" class="doc doc-heading">
-<code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">pheval_results</span><span class="p">,</span> <span class="n">sort_order</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">pheval_results</span><span class="p">,</span> <span class="n">sort_order</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2865,6 +2577,8 @@ <h3 id="src.pheval.post_processing.post_processing.ResultSorter.__init__" class=
   
       <p>Initialise ResultSorter</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2881,7 +2595,11 @@ <h3 id="src.pheval.post_processing.post_processing.ResultSorter.__init__" class=
           <td>
                 <code>[<a class="autorefs autorefs-internal" title="src.pheval.post_processing.post_processing.PhEvalResult" href="#src.pheval.post_processing.post_processing.PhEvalResult">PhEvalResult</a>]</code>
           </td>
-          <td><p>List of PhEvalResult instances to be sorted</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List of PhEvalResult instances to be sorted</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2891,7 +2609,11 @@ <h3 id="src.pheval.post_processing.post_processing.ResultSorter.__init__" class=
           <td>
                 <code><a class="autorefs autorefs-internal" title="src.pheval.post_processing.post_processing.SortOrder" href="#src.pheval.post_processing.post_processing.SortOrder">SortOrder</a></code>
           </td>
-          <td><p>Sorting order to be applied</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Sorting order to be applied</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2899,9 +2621,9 @@ <h3 id="src.pheval.post_processing.post_processing.ResultSorter.__init__" class=
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">188</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">188</span>
 <span class="normal">189</span>
 <span class="normal">190</span>
 <span class="normal">191</span>
@@ -2921,17 +2643,18 @@ <h3 id="src.pheval.post_processing.post_processing.ResultSorter.__init__" class=
     <span class="bp">self</span><span class="o">.</span><span class="n">pheval_results</span> <span class="o">=</span> <span class="n">pheval_results</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">sort_order</span> <span class="o">=</span> <span class="n">sort_order</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.post_processing.post_processing.ResultSorter.sort_pheval_results" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">sort_pheval_results</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">sort_pheval_results</span><span class="p">()</span></code>
 
 </h3>
 
@@ -2940,6 +2663,8 @@ <h3 id="src.pheval.post_processing.post_processing.ResultSorter.sort_pheval_resu
   
       <p>Sort results based on the specified sort order.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2953,14 +2678,18 @@ <h3 id="src.pheval.post_processing.post_processing.ResultSorter.sort_pheval_resu
           <td>
                 <code>[<a class="autorefs autorefs-internal" title="src.pheval.post_processing.post_processing.PhEvalResult" href="#src.pheval.post_processing.post_processing.PhEvalResult">PhEvalResult</a>]</code>
           </td>
-          <td><p>[PhEvalResult]: Sorted list of PhEvalResult instances.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>[PhEvalResult]: Sorted list of PhEvalResult instances.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">217</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">217</span>
 <span class="normal">218</span>
 <span class="normal">219</span>
 <span class="normal">220</span>
@@ -2984,7 +2713,7 @@ <h3 id="src.pheval.post_processing.post_processing.ResultSorter.sort_pheval_resu
         <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">_sort_by_decreasing_score</span><span class="p">()</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
@@ -2995,6 +2724,7 @@ <h3 id="src.pheval.post_processing.post_processing.ResultSorter.sort_pheval_resu
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -3002,23 +2732,22 @@ <h3 id="src.pheval.post_processing.post_processing.ResultSorter.sort_pheval_resu
 
 
 <h2 id="src.pheval.post_processing.post_processing.SortOrder" class="doc doc-heading">
-        <code>SortOrder</code>
+          <code>SortOrder</code>
 
 
 </h2>
 
 
   <div class="doc doc-contents ">
-      <p class="doc doc-class-bases">
-        Bases: <code><span title="enum.Enum">Enum</span></code></p>
+          <p class="doc doc-class-bases">
+            Bases: <code><span title="enum.Enum">Enum</span></code></p>
 
   
       <p>Enumeration representing sorting orders.</p>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">176</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">176</span>
 <span class="normal">177</span>
 <span class="normal">178</span>
 <span class="normal">179</span>
@@ -3032,7 +2761,7 @@ <h2 id="src.pheval.post_processing.post_processing.SortOrder" class="doc doc-hea
     <span class="n">DESCENDING</span> <span class="o">=</span> <span class="mi">2</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;Descending sort order.&quot;&quot;&quot;</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -3049,10 +2778,11 @@ <h2 id="src.pheval.post_processing.post_processing.SortOrder" class="doc doc-hea
 
 
 <h3 id="src.pheval.post_processing.post_processing.SortOrder.ASCENDING" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">ASCENDING</span> <span class="o">=</span> <span class="mi">1</span></code>
+          <code class="highlight language-python"><span class="n">ASCENDING</span> <span class="o">=</span> <span class="mi">1</span></code>
   
   <span class="doc doc-labels">
       <small class="doc doc-label doc-label-class-attribute"><code>class-attribute</code></small>
+      <small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
   </span>
 
 </h3>
@@ -3070,10 +2800,11 @@ <h3 id="src.pheval.post_processing.post_processing.SortOrder.ASCENDING" class="d
 
 
 <h3 id="src.pheval.post_processing.post_processing.SortOrder.DESCENDING" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">DESCENDING</span> <span class="o">=</span> <span class="mi">2</span></code>
+          <code class="highlight language-python"><span class="n">DESCENDING</span> <span class="o">=</span> <span class="mi">2</span></code>
   
   <span class="doc doc-labels">
       <small class="doc doc-label doc-label-class-attribute"><code>class-attribute</code></small>
+      <small class="doc doc-label doc-label-instance-attribute"><code>instance-attribute</code></small>
   </span>
 
 </h3>
@@ -3094,56 +2825,29 @@ <h3 id="src.pheval.post_processing.post_processing.SortOrder.DESCENDING" class="
 
   </div>
 
+
 </div>
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.post_processing.post_processing.calculate_end_pos" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">calculate_end_pos</span><span class="p">(</span><span class="n">variant_start</span><span class="p">,</span> <span class="n">variant_ref</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">calculate_end_pos</span><span class="p">(</span><span class="n">variant_start</span><span class="p">,</span> <span class="n">variant_ref</span><span class="p">)</span></code>
 
 </h2>
 
 
   <div class="doc doc-contents ">
   
-      <p>Calculate the end position for a variant</p>
+      <p>Calculate the end position for a variant
+Args:
+    variant_start (int): The start position of the variant
+    variant_ref (str): The reference allele of the variant</p>
+
 
-  <p><strong>Parameters:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Name</th>
-        <th>Type</th>
-        <th>Description</th>
-        <th>Default</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-          <td><code>variant_start</code></td>
-          <td>
-                <code>int</code>
-          </td>
-          <td><p>The start position of the variant</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>variant_ref</code></td>
-          <td>
-                <code>str</code>
-          </td>
-          <td><p>The reference allele of the variant</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-    </tbody>
-  </table>
 
   <p><strong>Returns:</strong></p>
   <table>
@@ -3158,14 +2862,18 @@ <h2 id="src.pheval.post_processing.post_processing.calculate_end_pos" class="doc
 <td><code>int</code></td>          <td>
                 <code>int</code>
           </td>
-          <td><p>The end position of the variant</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The end position of the variant</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">13</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">13</span>
 <span class="normal">14</span>
 <span class="normal">15</span>
 <span class="normal">16</span>
@@ -3185,17 +2893,18 @@ <h2 id="src.pheval.post_processing.post_processing.calculate_end_pos" class="doc
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="k">return</span> <span class="n">variant_start</span> <span class="o">+</span> <span class="nb">len</span><span class="p">(</span><span class="n">variant_ref</span><span class="p">)</span> <span class="o">-</span> <span class="mi">1</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.post_processing.post_processing.generate_pheval_result" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">generate_pheval_result</span><span class="p">(</span><span class="n">pheval_result</span><span class="p">,</span> <span class="n">sort_order_str</span><span class="p">,</span> <span class="n">output_dir</span><span class="p">,</span> <span class="n">tool_result_path</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">generate_pheval_result</span><span class="p">(</span><span class="n">pheval_result</span><span class="p">,</span> <span class="n">sort_order_str</span><span class="p">,</span> <span class="n">output_dir</span><span class="p">,</span> <span class="n">tool_result_path</span><span class="p">)</span></code>
 
 </h2>
 
@@ -3204,6 +2913,8 @@ <h2 id="src.pheval.post_processing.post_processing.generate_pheval_result" class
   
       <p>Generate PhEval variant, gene or disease TSV result based on input results.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -3220,7 +2931,11 @@ <h2 id="src.pheval.post_processing.post_processing.generate_pheval_result" class
           <td>
                 <code>[<a class="autorefs autorefs-internal" title="src.pheval.post_processing.post_processing.PhEvalResult" href="#src.pheval.post_processing.post_processing.PhEvalResult">PhEvalResult</a>]</code>
           </td>
-          <td><p>List of PhEvalResult instances to be processed.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List of PhEvalResult instances to be processed.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3230,7 +2945,11 @@ <h2 id="src.pheval.post_processing.post_processing.generate_pheval_result" class
           <td>
                 <code>str</code>
           </td>
-          <td><p>String representation of the desired sorting order.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>String representation of the desired sorting order.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3240,7 +2959,11 @@ <h2 id="src.pheval.post_processing.post_processing.generate_pheval_result" class
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the output directory.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the output directory.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3250,7 +2973,11 @@ <h2 id="src.pheval.post_processing.post_processing.generate_pheval_result" class
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the tool-specific result file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the tool-specific result file.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3258,6 +2985,8 @@ <h2 id="src.pheval.post_processing.post_processing.generate_pheval_result" class
     </tbody>
   </table>
 
+
+
   <p><strong>Raises:</strong></p>
   <table>
     <thead>
@@ -3271,14 +3000,18 @@ <h2 id="src.pheval.post_processing.post_processing.generate_pheval_result" class
           <td>
                 <code>ValueError</code>
           </td>
-          <td><p>If the results are not all the same type or an error occurs during file writing.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>If the results are not all the same type or an error occurs during file writing.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">357</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">357</span>
 <span class="normal">358</span>
 <span class="normal">359</span>
 <span class="normal">360</span>
@@ -3338,7 +3071,7 @@ <h2 id="src.pheval.post_processing.post_processing.generate_pheval_result" class
     <span class="k">else</span><span class="p">:</span>
         <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;Results are not all of the same type.&quot;</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
diff --git a/api/pheval/prepare/create_noisy_phenopackets/index.html b/api/pheval/prepare/create_noisy_phenopackets/index.html
index 66e81f3ad..f18adcf1a 100644
--- a/api/pheval/prepare/create_noisy_phenopackets/index.html
+++ b/api/pheval/prepare/create_noisy_phenopackets/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/prepare/create_noisy_phenopackets/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -828,7 +828,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_noisy_phenopackets" class="md-nav__link">
-    src.pheval.prepare.create_noisy_phenopackets
+    create_noisy_phenopackets
   </a>
   
 </li>
@@ -843,49 +843,49 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.convert_patient_terms_to_parent" class="md-nav__link">
-    convert_patient_terms_to_parent()
+    convert_patient_terms_to_parent
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.create_random_hpo_terms" class="md-nav__link">
-    create_random_hpo_terms()
+    create_random_hpo_terms
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.randomise_hpo_terms" class="md-nav__link">
-    randomise_hpo_terms()
+    randomise_hpo_terms
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.retain_real_patient_terms" class="md-nav__link">
-    retain_real_patient_terms()
+    retain_real_patient_terms
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.retrieve_hpo_term" class="md-nav__link">
-    retrieve_hpo_term()
+    retrieve_hpo_term
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.scramble_factor_proportions" class="md-nav__link">
-    scramble_factor_proportions()
+    scramble_factor_proportions
   </a>
   
 </li>
@@ -897,35 +897,35 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_noisy_phenopackets.add_noise_to_phenotypic_profile" class="md-nav__link">
-    add_noise_to_phenotypic_profile()
+    add_noise_to_phenotypic_profile
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_noisy_phenopackets.create_scrambled_phenopacket" class="md-nav__link">
-    create_scrambled_phenopacket()
+    create_scrambled_phenopacket
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_noisy_phenopackets.create_scrambled_phenopackets" class="md-nav__link">
-    create_scrambled_phenopackets()
+    create_scrambled_phenopackets
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_noisy_phenopackets.load_ontology" class="md-nav__link">
-    load_ontology()
+    load_ontology
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_noisy_phenopackets.scramble_phenopackets" class="md-nav__link">
-    scramble_phenopackets()
+    scramble_phenopackets
   </a>
   
 </li>
@@ -1304,7 +1304,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_noisy_phenopackets" class="md-nav__link">
-    src.pheval.prepare.create_noisy_phenopackets
+    create_noisy_phenopackets
   </a>
   
 </li>
@@ -1319,49 +1319,49 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.convert_patient_terms_to_parent" class="md-nav__link">
-    convert_patient_terms_to_parent()
+    convert_patient_terms_to_parent
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.create_random_hpo_terms" class="md-nav__link">
-    create_random_hpo_terms()
+    create_random_hpo_terms
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.randomise_hpo_terms" class="md-nav__link">
-    randomise_hpo_terms()
+    randomise_hpo_terms
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.retain_real_patient_terms" class="md-nav__link">
-    retain_real_patient_terms()
+    retain_real_patient_terms
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.retrieve_hpo_term" class="md-nav__link">
-    retrieve_hpo_term()
+    retrieve_hpo_term
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.scramble_factor_proportions" class="md-nav__link">
-    scramble_factor_proportions()
+    scramble_factor_proportions
   </a>
   
 </li>
@@ -1373,35 +1373,35 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_noisy_phenopackets.add_noise_to_phenotypic_profile" class="md-nav__link">
-    add_noise_to_phenotypic_profile()
+    add_noise_to_phenotypic_profile
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_noisy_phenopackets.create_scrambled_phenopacket" class="md-nav__link">
-    create_scrambled_phenopacket()
+    create_scrambled_phenopacket
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_noisy_phenopackets.create_scrambled_phenopackets" class="md-nav__link">
-    create_scrambled_phenopackets()
+    create_scrambled_phenopackets
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_noisy_phenopackets.load_ontology" class="md-nav__link">
-    load_ontology()
+    load_ontology
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_noisy_phenopackets.scramble_phenopackets" class="md-nav__link">
-    scramble_phenopackets()
+    scramble_phenopackets
   </a>
   
 </li>
@@ -1431,6 +1431,7 @@ <h1>Create noisy phenopackets</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.prepare.create_noisy_phenopackets"></a>
   <div class="doc doc-contents first">
 
@@ -1450,7 +1451,7 @@ <h1>Create noisy phenopackets</h1>
 
 
 <h2 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser" class="doc doc-heading">
-        <code>HpoRandomiser</code>
+          <code>HpoRandomiser</code>
 
 
 </h2>
@@ -1461,10 +1462,9 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser" class="doc d
   
       <p>Class for randomising phenopacket phenotypic features using Human Phenotype Ontology (HPO).</p>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/prepare/create_noisy_phenopackets.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 29</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/prepare/create_noisy_phenopackets.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 29</span>
 <span class="normal"> 30</span>
 <span class="normal"> 31</span>
 <span class="normal"> 32</span>
@@ -1772,7 +1772,7 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser" class="doc d
             <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">create_random_hpo_terms</span><span class="p">(</span><span class="n">number_of_scrambled_terms</span><span class="p">)</span>
         <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1786,12 +1786,13 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser" class="doc d
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.__init__" class="doc doc-heading">
-<code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">hpo_ontology</span><span class="p">,</span> <span class="n">scramble_factor</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">hpo_ontology</span><span class="p">,</span> <span class="n">scramble_factor</span><span class="p">)</span></code>
 
 </h3>
 
@@ -1800,6 +1801,8 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.__init__" cla
   
       <p>Initialise the HpoRandomiser.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1816,7 +1819,11 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.__init__" cla
           <td>
                 <code><span title="oaklib.implementations.pronto.pronto_implementation.ProntoImplementation">ProntoImplementation</span></code>
           </td>
-          <td><p>The instance of the HPO ontology.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The instance of the HPO ontology.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1826,7 +1833,11 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.__init__" cla
           <td>
                 <code>float</code>
           </td>
-          <td><p>A factor for scrambling phenotypic features.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>A factor for scrambling phenotypic features.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1834,9 +1845,9 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.__init__" cla
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_noisy_phenopackets.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">32</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_noisy_phenopackets.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">32</span>
 <span class="normal">33</span>
 <span class="normal">34</span>
 <span class="normal">35</span>
@@ -1858,17 +1869,18 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.__init__" cla
     <span class="bp">self</span><span class="o">.</span><span class="n">phenotypic_abnormalities</span> <span class="o">=</span> <span class="nb">set</span><span class="p">(</span><span class="n">hpo_ontology</span><span class="o">.</span><span class="n">roots</span><span class="p">(</span><span class="n">predicates</span><span class="o">=</span><span class="p">[</span><span class="s2">&quot;HP:0000118&quot;</span><span class="p">]))</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">scramble_factor</span> <span class="o">=</span> <span class="n">scramble_factor</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.convert_patient_terms_to_parent" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">convert_patient_terms_to_parent</span><span class="p">(</span><span class="n">phenotypic_features</span><span class="p">,</span> <span class="n">retained_phenotypic_features</span><span class="p">,</span> <span class="n">number_of_scrambled_terms</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">convert_patient_terms_to_parent</span><span class="p">(</span><span class="n">phenotypic_features</span><span class="p">,</span> <span class="n">retained_phenotypic_features</span><span class="p">,</span> <span class="n">number_of_scrambled_terms</span><span class="p">)</span></code>
 
 </h3>
 
@@ -1877,6 +1889,8 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.convert_patie
   
       <p>Convert a subset of patient HPO terms to their respective parent terms.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1893,7 +1907,11 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.convert_patie
           <td>
                 <code><span title="typing.List">List</span>[<span title="phenopackets.PhenotypicFeature">PhenotypicFeature</span>]</code>
           </td>
-          <td><p>List of all phenotypic features.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List of all phenotypic features.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1903,7 +1921,11 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.convert_patie
           <td>
                 <code><span title="typing.List">List</span>[<span title="phenopackets.PhenotypicFeature">PhenotypicFeature</span>]</code>
           </td>
-          <td><p>List of retained non-scrambled phenotypic features.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List of retained non-scrambled phenotypic features.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1913,7 +1935,11 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.convert_patie
           <td>
                 <code>int</code>
           </td>
-          <td><p>The count of scrambled HPO terms.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The count of scrambled HPO terms.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1921,6 +1947,8 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.convert_patie
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -1934,21 +1962,25 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.convert_patie
           <td>
                 <code><span title="typing.List">List</span>[<span title="phenopackets.PhenotypicFeature">PhenotypicFeature</span>]</code>
           </td>
-          <td><p>List[PhenotypicFeature]: A list of HPO terms converted to their parent terms.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List[PhenotypicFeature]: A list of HPO terms converted to their parent terms.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-<details class="note">
+<details class="note" open>
   <summary>Note</summary>
   <p>This method identifies a subset of patient HPO terms that are not retained among the
 non-scrambled phenotypic features and converts them to their respective parent terms.
 It then returns a list of parent HPO terms based on the provided scrambled terms count.
 If no remaining HPO terms are available for conversion, no parent terms are returned.</p>
 </details>
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_noisy_phenopackets.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 94</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_noisy_phenopackets.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 94</span>
 <span class="normal"> 95</span>
 <span class="normal"> 96</span>
 <span class="normal"> 97</span>
@@ -2028,17 +2060,18 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.convert_patie
             <span class="n">parent_terms</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">retrieve_hpo_term</span><span class="p">(</span><span class="n">random</span><span class="o">.</span><span class="n">choice</span><span class="p">(</span><span class="n">parents</span><span class="p">)))</span>
     <span class="k">return</span> <span class="n">parent_terms</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.create_random_hpo_terms" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">create_random_hpo_terms</span><span class="p">(</span><span class="n">number_of_scrambled_terms</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">create_random_hpo_terms</span><span class="p">(</span><span class="n">number_of_scrambled_terms</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2047,6 +2080,8 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.create_random
   
       <p>Generate a list of random HPO terms.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2063,7 +2098,11 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.create_random
           <td>
                 <code>int</code>
           </td>
-          <td><p>The count of random HPO terms to be generated.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The count of random HPO terms to be generated.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2071,6 +2110,8 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.create_random
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2084,14 +2125,18 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.create_random
           <td>
                 <code><span title="typing.List">List</span>[<span title="phenopackets.PhenotypicFeature">PhenotypicFeature</span>]</code>
           </td>
-          <td><p>List[PhenotypicFeature]: A list of randomly selected HPO terms.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List[PhenotypicFeature]: A list of randomly selected HPO terms.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_noisy_phenopackets.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">135</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_noisy_phenopackets.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">135</span>
 <span class="normal">136</span>
 <span class="normal">137</span>
 <span class="normal">138</span>
@@ -2119,17 +2164,18 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.create_random
     <span class="p">)</span>
     <span class="k">return</span> <span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">retrieve_hpo_term</span><span class="p">(</span><span class="n">random_id</span><span class="p">)</span> <span class="k">for</span> <span class="n">random_id</span> <span class="ow">in</span> <span class="n">random_ids</span><span class="p">]</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.randomise_hpo_terms" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">randomise_hpo_terms</span><span class="p">(</span><span class="n">phenotypic_features</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">randomise_hpo_terms</span><span class="p">(</span><span class="n">phenotypic_features</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2138,6 +2184,8 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.randomise_hpo
   
       <p>Randomise the provided phenotypic features by combining retained, parent-converted, and random HPO terms.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2154,7 +2202,11 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.randomise_hpo
           <td>
                 <code><span title="typing.List">List</span>[<span title="phenopackets.PhenotypicFeature">PhenotypicFeature</span>]</code>
           </td>
-          <td><p>List of phenotypic features to be randomised.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List of phenotypic features to be randomised.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2162,6 +2214,8 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.randomise_hpo
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2175,12 +2229,16 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.randomise_hpo
           <td>
                 <code><span title="typing.List">List</span>[<span title="phenopackets.PhenotypicFeature">PhenotypicFeature</span>]</code>
           </td>
-          <td><p>List[PhenotypicFeature]: A list of randomised HPO terms.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List[PhenotypicFeature]: A list of randomised HPO terms.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-<details class="note">
+<details class="note" open>
   <summary>Note</summary>
   <p>This method randomises the provided phenotypic features by incorporating three types of HPO terms:
 1. Retained Patient Terms: Non-scrambled (real patient) HPO terms retained based on the scramble factor.
@@ -2189,9 +2247,9 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.randomise_hpo
 <p>The method determines the count of terms for each category and combines them to form a final list
 of randomised HPO terms to be used in the phenotypic features.</p>
 </details>
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_noisy_phenopackets.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">150</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_noisy_phenopackets.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">150</span>
 <span class="normal">151</span>
 <span class="normal">152</span>
 <span class="normal">153</span>
@@ -2257,17 +2315,18 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.randomise_hpo
         <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">create_random_hpo_terms</span><span class="p">(</span><span class="n">number_of_scrambled_terms</span><span class="p">)</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.retain_real_patient_terms" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">retain_real_patient_terms</span><span class="p">(</span><span class="n">phenotypic_features</span><span class="p">,</span> <span class="n">number_of_scrambled_terms</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">retain_real_patient_terms</span><span class="p">(</span><span class="n">phenotypic_features</span><span class="p">,</span> <span class="n">number_of_scrambled_terms</span><span class="p">)</span></code>
   
   <span class="doc doc-labels">
       <small class="doc doc-label doc-label-staticmethod"><code>staticmethod</code></small>
@@ -2280,6 +2339,8 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.retain_real_p
   
       <p>Return a list of real patient HPO terms, retaining a specific number of non-scrambled terms.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2296,7 +2357,11 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.retain_real_p
           <td>
                 <code><span title="typing.List">List</span>[<span title="phenopackets.PhenotypicFeature">PhenotypicFeature</span>]</code>
           </td>
-          <td><p>List of phenotypic features.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List of phenotypic features.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2306,7 +2371,11 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.retain_real_p
           <td>
                 <code>int</code>
           </td>
-          <td><p>The count of scrambled HPO terms.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The count of scrambled HPO terms.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2314,6 +2383,8 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.retain_real_p
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2327,14 +2398,18 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.retain_real_p
           <td>
                 <code><span title="typing.List">List</span>[<span title="phenopackets.PhenotypicFeature">PhenotypicFeature</span>]</code>
           </td>
-          <td><p>List[PhenotypicFeature]: A list of non-scrambled (real patient) HPO terms.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List[PhenotypicFeature]: A list of non-scrambled (real patient) HPO terms.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_noisy_phenopackets.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">73</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_noisy_phenopackets.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">73</span>
 <span class="normal">74</span>
 <span class="normal">75</span>
 <span class="normal">76</span>
@@ -2374,17 +2449,18 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.retain_real_p
         <span class="n">number_of_real_id</span> <span class="o">=</span> <span class="mi">1</span>
     <span class="k">return</span> <span class="n">random</span><span class="o">.</span><span class="n">sample</span><span class="p">(</span><span class="n">phenotypic_features</span><span class="p">,</span> <span class="n">number_of_real_id</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.retrieve_hpo_term" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">retrieve_hpo_term</span><span class="p">(</span><span class="n">hpo_id</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">retrieve_hpo_term</span><span class="p">(</span><span class="n">hpo_id</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2393,6 +2469,8 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.retrieve_hpo_
   
       <p>Retrieve an HPO term based on the provided HPO ID.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2409,7 +2487,11 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.retrieve_hpo_
           <td>
                 <code>str</code>
           </td>
-          <td><p>The HPO ID of the term to retrieve.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The HPO ID of the term to retrieve.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2417,6 +2499,8 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.retrieve_hpo_
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2430,14 +2514,18 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.retrieve_hpo_
 <td><code>PhenotypicFeature</code></td>          <td>
                 <code><span title="phenopackets.PhenotypicFeature">PhenotypicFeature</span></code>
           </td>
-          <td><p>The PhenotypicFeature object representing the retrieved HPO term.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The PhenotypicFeature object representing the retrieved HPO term.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_noisy_phenopackets.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">59</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_noisy_phenopackets.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">59</span>
 <span class="normal">60</span>
 <span class="normal">61</span>
 <span class="normal">62</span>
@@ -2463,17 +2551,18 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.retrieve_hpo_
     <span class="n">hpo_term</span> <span class="o">=</span> <span class="s2">&quot;&quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">rels</span><span class="p">[(</span><span class="nb">list</span><span class="p">(</span><span class="n">rels</span><span class="o">.</span><span class="n">keys</span><span class="p">())[</span><span class="mi">0</span><span class="p">])])</span>
     <span class="k">return</span> <span class="n">PhenotypicFeature</span><span class="p">(</span><span class="nb">type</span><span class="o">=</span><span class="n">OntologyClass</span><span class="p">(</span><span class="nb">id</span><span class="o">=</span><span class="n">hpo_id</span><span class="p">,</span> <span class="n">label</span><span class="o">=</span><span class="n">hpo_term</span><span class="p">))</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.scramble_factor_proportions" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">scramble_factor_proportions</span><span class="p">(</span><span class="n">phenotypic_features</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">scramble_factor_proportions</span><span class="p">(</span><span class="n">phenotypic_features</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2482,6 +2571,8 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.scramble_fact
   
       <p>Calculate the proportion of scrambled HPO terms based on the scramble factor.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2498,7 +2589,11 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.scramble_fact
           <td>
                 <code>list[<span title="phenopackets.PhenotypicFeature">PhenotypicFeature</span>]</code>
           </td>
-          <td><p>List of phenotypic features.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List of phenotypic features.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2506,6 +2601,8 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.scramble_fact
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2519,14 +2616,18 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.scramble_fact
 <td><code>int</code></td>          <td>
                 <code>int</code>
           </td>
-          <td><p>The calculated number of phenotypic features to be scrambled.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The calculated number of phenotypic features to be scrambled.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_noisy_phenopackets.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">44</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_noisy_phenopackets.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">44</span>
 <span class="normal">45</span>
 <span class="normal">46</span>
 <span class="normal">47</span>
@@ -2554,7 +2655,7 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.scramble_fact
     <span class="k">else</span><span class="p">:</span>
         <span class="k">return</span> <span class="nb">int</span><span class="p">(</span><span class="nb">round</span><span class="p">(</span><span class="nb">len</span><span class="p">(</span><span class="n">phenotypic_features</span><span class="p">)</span> <span class="o">*</span> <span class="bp">self</span><span class="o">.</span><span class="n">scramble_factor</span><span class="p">,</span> <span class="mi">0</span><span class="p">))</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
@@ -2565,15 +2666,17 @@ <h3 id="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.scramble_fact
 
   </div>
 
+
 </div>
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.prepare.create_noisy_phenopackets.add_noise_to_phenotypic_profile" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">add_noise_to_phenotypic_profile</span><span class="p">(</span><span class="n">hpo_randomiser</span><span class="p">,</span> <span class="n">phenopacket</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">add_noise_to_phenotypic_profile</span><span class="p">(</span><span class="n">hpo_randomiser</span><span class="p">,</span> <span class="n">phenopacket</span><span class="p">)</span></code>
 
 </h2>
 
@@ -2582,6 +2685,8 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.add_noise_to_phenotypic_pro
   
       <p>Randomise the phenotypic profile of a Phenopacket or Family.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2598,7 +2703,11 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.add_noise_to_phenotypic_pro
           <td>
                 <code><a class="autorefs autorefs-internal" title="src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser" href="#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser">HpoRandomiser</a></code>
           </td>
-          <td><p>An instance of HpoRandomiser used for randomisation.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>An instance of HpoRandomiser used for randomisation.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2608,7 +2717,11 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.add_noise_to_phenotypic_pro
           <td>
                 <code><span title="typing.Union">Union</span>[<span title="phenopackets.Phenopacket">Phenopacket</span>, <span title="phenopackets.Family">Family</span>]</code>
           </td>
-          <td><p>The Phenopacket or Family to be randomised.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The Phenopacket or Family to be randomised.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2616,6 +2729,8 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.add_noise_to_phenotypic_pro
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2629,14 +2744,18 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.add_noise_to_phenotypic_pro
           <td>
                 <code><span title="typing.Union">Union</span>[<span title="phenopackets.Phenopacket">Phenopacket</span>, <span title="phenopackets.Family">Family</span>]</code>
           </td>
-          <td><p>Union[Phenopacket, Family]: The randomised Phenopacket or Family.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Union[Phenopacket, Family]: The randomised Phenopacket or Family.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_noisy_phenopackets.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">185</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_noisy_phenopackets.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">185</span>
 <span class="normal">186</span>
 <span class="normal">187</span>
 <span class="normal">188</span>
@@ -2672,17 +2791,18 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.add_noise_to_phenotypic_pro
     <span class="n">randomised_phenopacket</span> <span class="o">=</span> <span class="n">PhenopacketRebuilder</span><span class="p">(</span><span class="n">phenopacket</span><span class="p">)</span><span class="o">.</span><span class="n">add_randomised_hpo</span><span class="p">(</span><span class="n">random_phenotypes</span><span class="p">)</span>
     <span class="k">return</span> <span class="n">randomised_phenopacket</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.prepare.create_noisy_phenopackets.create_scrambled_phenopacket" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">create_scrambled_phenopacket</span><span class="p">(</span><span class="n">output_dir</span><span class="p">,</span> <span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">scramble_factor</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">create_scrambled_phenopacket</span><span class="p">(</span><span class="n">output_dir</span><span class="p">,</span> <span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">scramble_factor</span><span class="p">)</span></code>
 
 </h2>
 
@@ -2691,6 +2811,8 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.create_scrambled_phenopacke
   
       <p>Create a scrambled version of a Phenopacket.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2707,7 +2829,11 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.create_scrambled_phenopacke
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory to store the output scrambled Phenopacket.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory to store the output scrambled Phenopacket.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2717,7 +2843,11 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.create_scrambled_phenopacke
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The path to the original Phenopacket file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The path to the original Phenopacket file.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2727,7 +2857,11 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.create_scrambled_phenopacke
           <td>
                 <code>float</code>
           </td>
-          <td><p>A factor determining the level of scrambling for phenotypic features.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>A factor determining the level of scrambling for phenotypic features.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2735,9 +2869,9 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.create_scrambled_phenopacke
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_noisy_phenopackets.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">205</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_noisy_phenopackets.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">205</span>
 <span class="normal">206</span>
 <span class="normal">207</span>
 <span class="normal">208</span>
@@ -2781,17 +2915,18 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.create_scrambled_phenopacke
         <span class="n">output_dir</span><span class="o">.</span><span class="n">joinpath</span><span class="p">(</span><span class="n">phenopacket_path</span><span class="o">.</span><span class="n">name</span><span class="p">),</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.prepare.create_noisy_phenopackets.create_scrambled_phenopackets" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">create_scrambled_phenopackets</span><span class="p">(</span><span class="n">output_dir</span><span class="p">,</span> <span class="n">phenopacket_dir</span><span class="p">,</span> <span class="n">scramble_factor</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">create_scrambled_phenopackets</span><span class="p">(</span><span class="n">output_dir</span><span class="p">,</span> <span class="n">phenopacket_dir</span><span class="p">,</span> <span class="n">scramble_factor</span><span class="p">)</span></code>
 
 </h2>
 
@@ -2800,6 +2935,8 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.create_scrambled_phenopacke
   
       <p>Create scrambled versions of Phenopackets within a directory.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2816,7 +2953,11 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.create_scrambled_phenopacke
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory to store the output scrambled Phenopackets.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory to store the output scrambled Phenopackets.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2826,7 +2967,11 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.create_scrambled_phenopacke
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory containing the original Phenopacket files.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory containing the original Phenopacket files.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2836,7 +2981,11 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.create_scrambled_phenopacke
           <td>
                 <code>float</code>
           </td>
-          <td><p>A factor determining the level of scrambling for phenotypic features.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>A factor determining the level of scrambling for phenotypic features.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2844,9 +2993,9 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.create_scrambled_phenopacke
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_noisy_phenopackets.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">229</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_noisy_phenopackets.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">229</span>
 <span class="normal">230</span>
 <span class="normal">231</span>
 <span class="normal">232</span>
@@ -2892,17 +3041,18 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.create_scrambled_phenopacke
             <span class="p">),</span>
         <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.prepare.create_noisy_phenopackets.load_ontology" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">load_ontology</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">load_ontology</span><span class="p">()</span></code>
 
 </h2>
 
@@ -2911,6 +3061,8 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.load_ontology" class="doc d
   
       <p>Load the Human Phenotype Ontology (HPO).</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2923,14 +3075,18 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.load_ontology" class="doc d
         <tr>
 <td><code>ProntoImplementation</code></td>          <td>
           </td>
-          <td><p>An instance of ProntoImplementation containing the loaded HPO.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>An instance of ProntoImplementation containing the loaded HPO.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_noisy_phenopackets.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">18</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_noisy_phenopackets.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">18</span>
 <span class="normal">19</span>
 <span class="normal">20</span>
 <span class="normal">21</span>
@@ -2948,17 +3104,18 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.load_ontology" class="doc d
     <span class="n">resource</span> <span class="o">=</span> <span class="n">OntologyResource</span><span class="p">(</span><span class="n">slug</span><span class="o">=</span><span class="s2">&quot;hp.obo&quot;</span><span class="p">,</span> <span class="n">local</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
     <span class="k">return</span> <span class="n">ProntoImplementation</span><span class="p">(</span><span class="n">resource</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.prepare.create_noisy_phenopackets.scramble_phenopackets" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">scramble_phenopackets</span><span class="p">(</span><span class="n">output_dir</span><span class="p">,</span> <span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">phenopacket_dir</span><span class="p">,</span> <span class="n">scramble_factor</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">scramble_phenopackets</span><span class="p">(</span><span class="n">output_dir</span><span class="p">,</span> <span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">phenopacket_dir</span><span class="p">,</span> <span class="n">scramble_factor</span><span class="p">)</span></code>
 
 </h2>
 
@@ -2967,6 +3124,8 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.scramble_phenopackets" clas
   
       <p>Create scrambled phenopackets from either a single phenopacket or a directory of phenopackets.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2983,7 +3142,11 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.scramble_phenopackets" clas
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory to store the output scrambled Phenopackets.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory to store the output scrambled Phenopackets.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2993,7 +3156,11 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.scramble_phenopackets" clas
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The path to a single Phenopacket file (if applicable).</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The path to a single Phenopacket file (if applicable).</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3003,7 +3170,11 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.scramble_phenopackets" clas
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory containing multiple Phenopacket files (if applicable).</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory containing multiple Phenopacket files (if applicable).</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3013,7 +3184,11 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.scramble_phenopackets" clas
           <td>
                 <code>float</code>
           </td>
-          <td><p>A factor determining the level of scrambling for phenotypic features.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>A factor determining the level of scrambling for phenotypic features.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3021,9 +3196,9 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.scramble_phenopackets" clas
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_noisy_phenopackets.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">254</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_noisy_phenopackets.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">254</span>
 <span class="normal">255</span>
 <span class="normal">256</span>
 <span class="normal">257</span>
@@ -3057,7 +3232,7 @@ <h2 id="src.pheval.prepare.create_noisy_phenopackets.scramble_phenopackets" clas
     <span class="k">elif</span> <span class="n">phenopacket_dir</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
         <span class="n">create_scrambled_phenopackets</span><span class="p">(</span><span class="n">output_dir</span><span class="p">,</span> <span class="n">phenopacket_dir</span><span class="p">,</span> <span class="n">scramble_factor</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
diff --git a/api/pheval/prepare/create_spiked_vcf/index.html b/api/pheval/prepare/create_spiked_vcf/index.html
index 42eb17389..d6c9530b5 100644
--- a/api/pheval/prepare/create_spiked_vcf/index.html
+++ b/api/pheval/prepare/create_spiked_vcf/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/prepare/create_spiked_vcf/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -842,7 +842,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf" class="md-nav__link">
-    src.pheval.prepare.create_spiked_vcf
+    create_spiked_vcf
   </a>
   
 </li>
@@ -857,7 +857,7 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfFile.populate_fields" class="md-nav__link">
-    populate_fields()
+    populate_fields
   </a>
   
 </li>
@@ -884,28 +884,28 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.parse_assembly" class="md-nav__link">
-    parse_assembly()
+    parse_assembly
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.parse_sample_id" class="md-nav__link">
-    parse_sample_id()
+    parse_sample_id
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.parse_vcf_header" class="md-nav__link">
-    parse_vcf_header()
+    parse_vcf_header
   </a>
   
 </li>
@@ -925,35 +925,35 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfSpiker.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_header" class="md-nav__link">
-    construct_header()
+    construct_header
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_variant_entry" class="md-nav__link">
-    construct_variant_entry()
+    construct_variant_entry
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_vcf" class="md-nav__link">
-    construct_vcf()
+    construct_vcf
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_vcf_records" class="md-nav__link">
-    construct_vcf_records()
+    construct_vcf_records
   </a>
   
 </li>
@@ -973,28 +973,28 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfWriter.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfWriter.write_gzip" class="md-nav__link">
-    write_gzip()
+    write_gzip
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfWriter.write_uncompressed" class="md-nav__link">
-    write_uncompressed()
+    write_uncompressed
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfWriter.write_vcf_file" class="md-nav__link">
-    write_vcf_file()
+    write_vcf_file
   </a>
   
 </li>
@@ -1006,63 +1006,63 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.check_variant_assembly" class="md-nav__link">
-    check_variant_assembly()
+    check_variant_assembly
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.create_spiked_vcf" class="md-nav__link">
-    create_spiked_vcf()
+    create_spiked_vcf
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.create_spiked_vcfs" class="md-nav__link">
-    create_spiked_vcfs()
+    create_spiked_vcfs
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.generate_spiked_vcf_file" class="md-nav__link">
-    generate_spiked_vcf_file()
+    generate_spiked_vcf_file
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.read_vcf" class="md-nav__link">
-    read_vcf()
+    read_vcf
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.select_vcf_template" class="md-nav__link">
-    select_vcf_template()
+    select_vcf_template
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.spike_and_update_phenopacket" class="md-nav__link">
-    spike_and_update_phenopacket()
+    spike_and_update_phenopacket
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.spike_vcf_contents" class="md-nav__link">
-    spike_vcf_contents()
+    spike_vcf_contents
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.spike_vcfs" class="md-nav__link">
-    spike_vcfs()
+    spike_vcfs
   </a>
   
 </li>
@@ -1427,7 +1427,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf" class="md-nav__link">
-    src.pheval.prepare.create_spiked_vcf
+    create_spiked_vcf
   </a>
   
 </li>
@@ -1442,7 +1442,7 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfFile.populate_fields" class="md-nav__link">
-    populate_fields()
+    populate_fields
   </a>
   
 </li>
@@ -1469,28 +1469,28 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.parse_assembly" class="md-nav__link">
-    parse_assembly()
+    parse_assembly
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.parse_sample_id" class="md-nav__link">
-    parse_sample_id()
+    parse_sample_id
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.parse_vcf_header" class="md-nav__link">
-    parse_vcf_header()
+    parse_vcf_header
   </a>
   
 </li>
@@ -1510,35 +1510,35 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfSpiker.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_header" class="md-nav__link">
-    construct_header()
+    construct_header
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_variant_entry" class="md-nav__link">
-    construct_variant_entry()
+    construct_variant_entry
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_vcf" class="md-nav__link">
-    construct_vcf()
+    construct_vcf
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_vcf_records" class="md-nav__link">
-    construct_vcf_records()
+    construct_vcf_records
   </a>
   
 </li>
@@ -1558,28 +1558,28 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfWriter.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfWriter.write_gzip" class="md-nav__link">
-    write_gzip()
+    write_gzip
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfWriter.write_uncompressed" class="md-nav__link">
-    write_uncompressed()
+    write_uncompressed
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.VcfWriter.write_vcf_file" class="md-nav__link">
-    write_vcf_file()
+    write_vcf_file
   </a>
   
 </li>
@@ -1591,63 +1591,63 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.check_variant_assembly" class="md-nav__link">
-    check_variant_assembly()
+    check_variant_assembly
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.create_spiked_vcf" class="md-nav__link">
-    create_spiked_vcf()
+    create_spiked_vcf
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.create_spiked_vcfs" class="md-nav__link">
-    create_spiked_vcfs()
+    create_spiked_vcfs
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.generate_spiked_vcf_file" class="md-nav__link">
-    generate_spiked_vcf_file()
+    generate_spiked_vcf_file
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.read_vcf" class="md-nav__link">
-    read_vcf()
+    read_vcf
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.select_vcf_template" class="md-nav__link">
-    select_vcf_template()
+    select_vcf_template
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.spike_and_update_phenopacket" class="md-nav__link">
-    spike_and_update_phenopacket()
+    spike_and_update_phenopacket
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.spike_vcf_contents" class="md-nav__link">
-    spike_vcf_contents()
+    spike_vcf_contents
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.create_spiked_vcf.spike_vcfs" class="md-nav__link">
-    spike_vcfs()
+    spike_vcfs
   </a>
   
 </li>
@@ -1677,6 +1677,7 @@ <h1>Create spiked vcf</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.prepare.create_spiked_vcf"></a>
   <div class="doc doc-contents first">
 
@@ -1696,7 +1697,7 @@ <h1>Create spiked vcf</h1>
 
 
 <h2 id="src.pheval.prepare.create_spiked_vcf.VcfFile" class="doc doc-heading">
-        <code>VcfFile</code>
+          <code>VcfFile</code>
 
   
   <span class="doc doc-labels">
@@ -1711,6 +1712,8 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.VcfFile" class="doc doc-heading">
   
       <p>Represents a VCF file with its name, contents, and header information.</p>
 
+
+
   <p><strong>Attributes:</strong></p>
   <table>
     <thead>
@@ -1722,33 +1725,44 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.VcfFile" class="doc doc-heading">
     </thead>
     <tbody>
         <tr>
-          <td><code>vcf_file_name</code></td>
+          <td><code><span title="src.pheval.prepare.create_spiked_vcf.VcfFile.vcf_file_name">vcf_file_name</span></code></td>
           <td>
                 <code>str</code>
           </td>
-          <td><p>The name of the VCF file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The name of the VCF file.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>vcf_contents</code></td>
+          <td><code><span title="src.pheval.prepare.create_spiked_vcf.VcfFile.vcf_contents">vcf_contents</span></code></td>
           <td>
                 <code><span title="typing.List">List</span>[str]</code>
           </td>
-          <td><p>The contents of the VCF file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The contents of the VCF file.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>vcf_header</code></td>
+          <td><code><span title="src.pheval.prepare.create_spiked_vcf.VcfFile.vcf_header">vcf_header</span></code></td>
           <td>
                 <code><a class="autorefs autorefs-internal" title="src.pheval.prepare.create_spiked_vcf.VcfHeader" href="#src.pheval.prepare.create_spiked_vcf.VcfHeader">VcfHeader</a></code>
           </td>
-          <td><p>The parsed header information of the VCF file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The parsed header information of the VCF file.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">175</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">175</span>
 <span class="normal">176</span>
 <span class="normal">177</span>
 <span class="normal">178</span>
@@ -1806,7 +1820,7 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.VcfFile" class="doc doc-heading">
         <span class="n">contents</span> <span class="o">=</span> <span class="n">read_vcf</span><span class="p">(</span><span class="n">template_vcf</span><span class="p">)</span>
         <span class="k">return</span> <span class="n">VcfFile</span><span class="p">(</span><span class="n">template_vcf</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">contents</span><span class="p">,</span> <span class="n">VcfHeaderParser</span><span class="p">(</span><span class="n">contents</span><span class="p">)</span><span class="o">.</span><span class="n">parse_vcf_header</span><span class="p">())</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1820,12 +1834,13 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.VcfFile" class="doc doc-heading">
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.prepare.create_spiked_vcf.VcfFile.populate_fields" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">populate_fields</span><span class="p">(</span><span class="n">template_vcf</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">populate_fields</span><span class="p">(</span><span class="n">template_vcf</span><span class="p">)</span></code>
   
   <span class="doc doc-labels">
       <small class="doc doc-label doc-label-staticmethod"><code>staticmethod</code></small>
@@ -1838,6 +1853,8 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfFile.populate_fields" class="doc
   
       <p>Populate the fields of the VcfFile instance using the contents of a template VCF file.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1854,7 +1871,11 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfFile.populate_fields" class="doc
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The path to the template VCF file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The path to the template VCF file.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1862,6 +1883,8 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfFile.populate_fields" class="doc
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -1874,14 +1897,18 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfFile.populate_fields" class="doc
         <tr>
 <td><code>VcfFile</code></td>          <td>
           </td>
-          <td><p>An instance of VcfFile with populated fields.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>An instance of VcfFile with populated fields.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">190</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">190</span>
 <span class="normal">191</span>
 <span class="normal">192</span>
 <span class="normal">193</span>
@@ -1909,7 +1936,7 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfFile.populate_fields" class="doc
     <span class="n">contents</span> <span class="o">=</span> <span class="n">read_vcf</span><span class="p">(</span><span class="n">template_vcf</span><span class="p">)</span>
     <span class="k">return</span> <span class="n">VcfFile</span><span class="p">(</span><span class="n">template_vcf</span><span class="o">.</span><span class="n">name</span><span class="p">,</span> <span class="n">contents</span><span class="p">,</span> <span class="n">VcfHeaderParser</span><span class="p">(</span><span class="n">contents</span><span class="p">)</span><span class="o">.</span><span class="n">parse_vcf_header</span><span class="p">())</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
@@ -1920,6 +1947,7 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfFile.populate_fields" class="doc
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -1927,7 +1955,7 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfFile.populate_fields" class="doc
 
 
 <h2 id="src.pheval.prepare.create_spiked_vcf.VcfHeader" class="doc doc-heading">
-        <code>VcfHeader</code>
+          <code>VcfHeader</code>
 
   
   <span class="doc doc-labels">
@@ -1942,6 +1970,8 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.VcfHeader" class="doc doc-heading">
   
       <p>Data obtained from VCF header.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1958,7 +1988,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.VcfHeader" class="doc doc-heading">
           <td>
                 <code>str</code>
           </td>
-          <td><p>The sample identifier from the VCF header.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The sample identifier from the VCF header.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1968,7 +2002,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.VcfHeader" class="doc doc-heading">
           <td>
                 <code>str</code>
           </td>
-          <td><p>The assembly information obtained from the VCF header.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The assembly information obtained from the VCF header.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1978,7 +2016,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.VcfHeader" class="doc doc-heading">
           <td>
                 <code>bool</code>
           </td>
-          <td><p>A boolean indicating whether the VCF denotes chromosomes as chr or not.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>A boolean indicating whether the VCF denotes chromosomes as chr or not.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1986,10 +2028,9 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.VcfHeader" class="doc doc-heading">
     </tbody>
   </table>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">78</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">78</span>
 <span class="normal">79</span>
 <span class="normal">80</span>
 <span class="normal">81</span>
@@ -2015,7 +2056,7 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.VcfHeader" class="doc doc-heading">
     <span class="n">assembly</span><span class="p">:</span> <span class="nb">str</span>
     <span class="n">chr_status</span><span class="p">:</span> <span class="nb">bool</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -2035,6 +2076,7 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.VcfHeader" class="doc doc-heading">
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -2042,7 +2084,7 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.VcfHeader" class="doc doc-heading">
 
 
 <h2 id="src.pheval.prepare.create_spiked_vcf.VcfHeaderParser" class="doc doc-heading">
-        <code>VcfHeaderParser</code>
+          <code>VcfHeaderParser</code>
 
 
 </h2>
@@ -2053,10 +2095,9 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.VcfHeaderParser" class="doc doc-hea
   
       <p>Class for parsing the header of a VCF file.</p>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">112</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">112</span>
 <span class="normal">113</span>
 <span class="normal">114</span>
 <span class="normal">115</span>
@@ -2178,7 +2219,7 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.VcfHeaderParser" class="doc doc-hea
         <span class="n">sample_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_sample_id</span><span class="p">()</span>
         <span class="k">return</span> <span class="n">VcfHeader</span><span class="p">(</span><span class="n">sample_id</span><span class="p">,</span> <span class="n">assembly</span><span class="p">,</span> <span class="n">chr_status</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -2192,12 +2233,13 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.VcfHeaderParser" class="doc doc-hea
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.__init__" class="doc doc-heading">
-<code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">vcf_contents</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">vcf_contents</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2206,6 +2248,8 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.__init__" class="do
   
       <p>Initialise the VcfHeaderParser.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2222,7 +2266,11 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.__init__" class="do
           <td>
                 <code>list[str]</code>
           </td>
-          <td><p>The contents of the VCF file as a list of strings.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The contents of the VCF file as a list of strings.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2230,9 +2278,9 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.__init__" class="do
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">115</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">115</span>
 <span class="normal">116</span>
 <span class="normal">117</span>
 <span class="normal">118</span>
@@ -2248,17 +2296,18 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.__init__" class="do
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">vcf_contents</span> <span class="o">=</span> <span class="n">vcf_contents</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.parse_assembly" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">parse_assembly</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">parse_assembly</span><span class="p">()</span></code>
 
 </h3>
 
@@ -2267,6 +2316,8 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.parse_assembly" cla
   
       <p>Parse the genome assembly and format of vcf_records.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2280,14 +2331,18 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.parse_assembly" cla
           <td>
                 <code>tuple[str, bool]</code>
           </td>
-          <td><p>Tuple[str, bool]: A tuple containing the assembly and chromosome status (True/False).</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Tuple[str, bool]: A tuple containing the assembly and chromosome status (True/False).</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">124</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">124</span>
 <span class="normal">125</span>
 <span class="normal">126</span>
 <span class="normal">127</span>
@@ -2341,17 +2396,18 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.parse_assembly" cla
     <span class="n">assembly</span> <span class="o">=</span> <span class="p">[</span><span class="n">k</span> <span class="k">for</span> <span class="n">k</span><span class="p">,</span> <span class="n">v</span> <span class="ow">in</span> <span class="n">genome_assemblies</span><span class="o">.</span><span class="n">items</span><span class="p">()</span> <span class="k">if</span> <span class="n">v</span> <span class="o">==</span> <span class="n">vcf_assembly</span><span class="p">][</span><span class="mi">0</span><span class="p">]</span>
     <span class="k">return</span> <span class="n">assembly</span><span class="p">,</span> <span class="n">chr_status</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.parse_sample_id" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">parse_sample_id</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">parse_sample_id</span><span class="p">()</span></code>
 
 </h3>
 
@@ -2360,6 +2416,8 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.parse_sample_id" cl
   
       <p>Parse the sample ID of the VCF.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2373,14 +2431,18 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.parse_sample_id" cl
 <td><code>str</code></td>          <td>
                 <code>str</code>
           </td>
-          <td><p>The sample ID extracted from the VCF header.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The sample ID extracted from the VCF header.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">152</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">152</span>
 <span class="normal">153</span>
 <span class="normal">154</span>
 <span class="normal">155</span>
@@ -2400,17 +2462,18 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.parse_sample_id" cl
         <span class="k">if</span> <span class="n">line</span><span class="o">.</span><span class="n">startswith</span><span class="p">(</span><span class="s2">&quot;#CHROM&quot;</span><span class="p">):</span>
             <span class="k">return</span> <span class="n">line</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">&quot;</span><span class="se">\t</span><span class="s2">&quot;</span><span class="p">)[</span><span class="mi">9</span><span class="p">]</span><span class="o">.</span><span class="n">rstrip</span><span class="p">()</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.parse_vcf_header" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">parse_vcf_header</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">parse_vcf_header</span><span class="p">()</span></code>
 
 </h3>
 
@@ -2419,6 +2482,8 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.parse_vcf_header" c
   
       <p>Parse the header of the VCF.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2432,14 +2497,18 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.parse_vcf_header" c
 <td><code>VcfHeader</code></td>          <td>
                 <code><a class="autorefs autorefs-internal" title="src.pheval.prepare.create_spiked_vcf.VcfHeader" href="#src.pheval.prepare.create_spiked_vcf.VcfHeader">VcfHeader</a></code>
           </td>
-          <td><p>An instance of VcfHeader containing sample ID, assembly, and chromosome status.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>An instance of VcfHeader containing sample ID, assembly, and chromosome status.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">163</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">163</span>
 <span class="normal">164</span>
 <span class="normal">165</span>
 <span class="normal">166</span>
@@ -2459,7 +2528,7 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.parse_vcf_header" c
     <span class="n">sample_id</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">parse_sample_id</span><span class="p">()</span>
     <span class="k">return</span> <span class="n">VcfHeader</span><span class="p">(</span><span class="n">sample_id</span><span class="p">,</span> <span class="n">assembly</span><span class="p">,</span> <span class="n">chr_status</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
@@ -2470,6 +2539,7 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.parse_vcf_header" c
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -2477,7 +2547,7 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.parse_vcf_header" c
 
 
 <h2 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker" class="doc doc-heading">
-        <code>VcfSpiker</code>
+          <code>VcfSpiker</code>
 
 
 </h2>
@@ -2488,10 +2558,9 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker" class="doc doc-heading">
   
       <p>Class for spiking proband variants into template VCF file contents.</p>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">284</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">284</span>
 <span class="normal">285</span>
 <span class="normal">286</span>
 <span class="normal">287</span>
@@ -2731,7 +2800,7 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker" class="doc doc-heading">
 <span class="sd">        &quot;&quot;&quot;</span>
         <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">construct_header</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">construct_vcf_records</span><span class="p">(</span><span class="n">template_vcf_name</span><span class="p">))</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -2745,12 +2814,13 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker" class="doc doc-heading">
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.__init__" class="doc doc-heading">
-<code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">vcf_contents</span><span class="p">,</span> <span class="n">proband_causative_variants</span><span class="p">,</span> <span class="n">vcf_header</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">vcf_contents</span><span class="p">,</span> <span class="n">proband_causative_variants</span><span class="p">,</span> <span class="n">vcf_header</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2759,6 +2829,8 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.__init__" class="doc doc-
   
       <p>Initialise the VcfSpiker.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2775,7 +2847,11 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.__init__" class="doc doc-
           <td>
                 <code><span title="typing.List">List</span>[str]</code>
           </td>
-          <td><p>Contents of the template VCF file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Contents of the template VCF file.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2785,7 +2861,11 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.__init__" class="doc doc-
           <td>
                 <code><span title="typing.List">List</span>[<span title="pheval.utils.phenopacket_utils.ProbandCausativeVariant">ProbandCausativeVariant</span>]</code>
           </td>
-          <td><p>List of proband causative variants.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List of proband causative variants.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2795,7 +2875,11 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.__init__" class="doc doc-
           <td>
                 <code><a class="autorefs autorefs-internal" title="src.pheval.prepare.create_spiked_vcf.VcfHeader" href="#src.pheval.prepare.create_spiked_vcf.VcfHeader">VcfHeader</a></code>
           </td>
-          <td><p>The VCF header information.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The VCF header information.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2803,9 +2887,9 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.__init__" class="doc doc-
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">287</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">287</span>
 <span class="normal">288</span>
 <span class="normal">289</span>
 <span class="normal">290</span>
@@ -2839,17 +2923,18 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.__init__" class="doc doc-
     <span class="bp">self</span><span class="o">.</span><span class="n">proband_causative_variants</span> <span class="o">=</span> <span class="n">proband_causative_variants</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">vcf_header</span> <span class="o">=</span> <span class="n">vcf_header</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_header" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">construct_header</span><span class="p">(</span><span class="n">updated_vcf_records</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">construct_header</span><span class="p">(</span><span class="n">updated_vcf_records</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2858,6 +2943,8 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_header" class="
   
       <p>Construct the header of the VCF.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2874,7 +2961,11 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_header" class="
           <td>
                 <code><span title="typing.List">List</span>[str]</code>
           </td>
-          <td><p>Updated VCF records.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Updated VCF records.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2882,6 +2973,8 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_header" class="
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2895,14 +2988,18 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_header" class="
           <td>
                 <code><span title="typing.List">List</span>[str]</code>
           </td>
-          <td><p>List[str]: Constructed header as a list of strings.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List[str]: Constructed header as a list of strings.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">371</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">371</span>
 <span class="normal">372</span>
 <span class="normal">373</span>
 <span class="normal">374</span>
@@ -2944,17 +3041,18 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_header" class="
         <span class="n">updated_vcf_file</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">text</span><span class="p">)</span>
     <span class="k">return</span> <span class="n">updated_vcf_file</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_variant_entry" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">construct_variant_entry</span><span class="p">(</span><span class="n">proband_variant_data</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">construct_variant_entry</span><span class="p">(</span><span class="n">proband_variant_data</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2963,6 +3061,8 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_variant_entry"
   
       <p>Construct variant entries.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2979,7 +3079,11 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_variant_entry"
           <td>
                 <code><span title="pheval.utils.phenopacket_utils.ProbandCausativeVariant">ProbandCausativeVariant</span></code>
           </td>
-          <td><p>Data for the proband variant.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Data for the proband variant.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2987,6 +3091,8 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_variant_entry"
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -3000,14 +3106,18 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_variant_entry"
           <td>
                 <code><span title="typing.List">List</span>[str]</code>
           </td>
-          <td><p>List[str]: Constructed variant entry as a list of strings.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List[str]: Constructed variant entry as a list of strings.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">305</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">305</span>
 <span class="normal">306</span>
 <span class="normal">307</span>
 <span class="normal">308</span>
@@ -3075,17 +3185,18 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_variant_entry"
         <span class="n">genotype_codes</span><span class="p">[</span><span class="n">proband_variant_data</span><span class="o">.</span><span class="n">genotype</span><span class="o">.</span><span class="n">lower</span><span class="p">()]</span> <span class="o">+</span> <span class="s2">&quot;</span><span class="se">\n</span><span class="s2">&quot;</span><span class="p">,</span>
     <span class="p">]</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_vcf" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">construct_vcf</span><span class="p">(</span><span class="n">template_vcf_name</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">construct_vcf</span><span class="p">(</span><span class="n">template_vcf_name</span><span class="p">)</span></code>
 
 </h3>
 
@@ -3094,6 +3205,8 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_vcf" class="doc
   
       <p>Construct the entire spiked VCF file by incorporating the spiked variants into the VCF.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -3110,7 +3223,11 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_vcf" class="doc
           <td>
                 <code>str</code>
           </td>
-          <td><p>Name of the template VCF file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Name of the template VCF file.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3118,6 +3235,8 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_vcf" class="doc
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -3131,14 +3250,18 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_vcf" class="doc
           <td>
                 <code><span title="typing.List">List</span>[str]</code>
           </td>
-          <td><p>List[str]: The complete spiked VCF file content as a list of strings.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List[str]: The complete spiked VCF file content as a list of strings.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">393</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">393</span>
 <span class="normal">394</span>
 <span class="normal">395</span>
 <span class="normal">396</span>
@@ -3160,17 +3283,18 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_vcf" class="doc
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">construct_header</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">construct_vcf_records</span><span class="p">(</span><span class="n">template_vcf_name</span><span class="p">))</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_vcf_records" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">construct_vcf_records</span><span class="p">(</span><span class="n">template_vcf_name</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">construct_vcf_records</span><span class="p">(</span><span class="n">template_vcf_name</span><span class="p">)</span></code>
 
 </h3>
 
@@ -3179,6 +3303,8 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_vcf_records" cl
   
       <p>Construct updated VCF records by inserting spiked variants into the correct positions within the VCF.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -3195,7 +3321,11 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_vcf_records" cl
           <td>
                 <code>str</code>
           </td>
-          <td><p>Name of the template VCF file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Name of the template VCF file.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3203,6 +3333,8 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_vcf_records" cl
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -3216,14 +3348,18 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_vcf_records" cl
           <td>
                 <code><span title="typing.List">List</span>[str]</code>
           </td>
-          <td><p>List[str]: Updated VCF records containing the spiked variants.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List[str]: Updated VCF records containing the spiked variants.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">340</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">340</span>
 <span class="normal">341</span>
 <span class="normal">342</span>
 <span class="normal">343</span>
@@ -3283,7 +3419,7 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_vcf_records" cl
         <span class="n">updated_vcf_records</span><span class="o">.</span><span class="n">insert</span><span class="p">(</span><span class="n">variant_entry_position</span><span class="p">,</span> <span class="s2">&quot;</span><span class="se">\t</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">variant_entry</span><span class="p">))</span>
     <span class="k">return</span> <span class="n">updated_vcf_records</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
@@ -3294,6 +3430,7 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_vcf_records" cl
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -3301,7 +3438,7 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_vcf_records" cl
 
 
 <h2 id="src.pheval.prepare.create_spiked_vcf.VcfWriter" class="doc doc-heading">
-        <code>VcfWriter</code>
+          <code>VcfWriter</code>
 
 
 </h2>
@@ -3312,10 +3449,9 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.VcfWriter" class="doc doc-heading">
   
       <p>Class for writing VCF file.</p>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">406</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">406</span>
 <span class="normal">407</span>
 <span class="normal">408</span>
 <span class="normal">409</span>
@@ -3403,7 +3539,7 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.VcfWriter" class="doc doc-heading">
 <span class="sd">        &quot;&quot;&quot;</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">write_gzip</span><span class="p">()</span> <span class="k">if</span> <span class="n">is_gzipped</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spiked_vcf_file_path</span><span class="p">)</span> <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">write_uncompressed</span><span class="p">()</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -3417,12 +3553,13 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.VcfWriter" class="doc doc-heading">
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.prepare.create_spiked_vcf.VcfWriter.__init__" class="doc doc-heading">
-<code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">vcf_contents</span><span class="p">,</span> <span class="n">spiked_vcf_file_path</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">vcf_contents</span><span class="p">,</span> <span class="n">spiked_vcf_file_path</span><span class="p">)</span></code>
 
 </h3>
 
@@ -3431,6 +3568,8 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfWriter.__init__" class="doc doc-
   
       <p>Initialise the VcfWriter class.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -3447,7 +3586,11 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfWriter.__init__" class="doc doc-
           <td>
                 <code><span title="typing.List">List</span>[str]</code>
           </td>
-          <td><p>Contents of the VCF file to be written.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Contents of the VCF file to be written.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3457,7 +3600,11 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfWriter.__init__" class="doc doc-
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the spiked VCF file to be created.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the spiked VCF file to be created.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3465,9 +3612,9 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfWriter.__init__" class="doc doc-
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">409</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">409</span>
 <span class="normal">410</span>
 <span class="normal">411</span>
 <span class="normal">412</span>
@@ -3495,17 +3642,18 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfWriter.__init__" class="doc doc-
     <span class="bp">self</span><span class="o">.</span><span class="n">vcf_contents</span> <span class="o">=</span> <span class="n">vcf_contents</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">spiked_vcf_file_path</span> <span class="o">=</span> <span class="n">spiked_vcf_file_path</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.prepare.create_spiked_vcf.VcfWriter.write_gzip" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">write_gzip</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">write_gzip</span><span class="p">()</span></code>
 
 </h3>
 
@@ -3514,9 +3662,9 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfWriter.write_gzip" class="doc do
   
       <p>Write the VCF contents to a gzipped VCF file.</p>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">424</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">424</span>
 <span class="normal">425</span>
 <span class="normal">426</span>
 <span class="normal">427</span>
@@ -3534,17 +3682,18 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfWriter.write_gzip" class="doc do
             <span class="n">f</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">line</span><span class="p">)</span>
     <span class="n">f</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.prepare.create_spiked_vcf.VcfWriter.write_uncompressed" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">write_uncompressed</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">write_uncompressed</span><span class="p">()</span></code>
 
 </h3>
 
@@ -3553,9 +3702,9 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfWriter.write_uncompressed" class
   
       <p>Write the VCF contents to an uncompressed VCF file.</p>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">434</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">434</span>
 <span class="normal">435</span>
 <span class="normal">436</span>
 <span class="normal">437</span>
@@ -3569,17 +3718,18 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfWriter.write_uncompressed" class
         <span class="n">file</span><span class="o">.</span><span class="n">writelines</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">vcf_contents</span><span class="p">)</span>
     <span class="n">file</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.prepare.create_spiked_vcf.VcfWriter.write_vcf_file" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">write_vcf_file</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">write_vcf_file</span><span class="p">()</span></code>
 
 </h3>
 
@@ -3590,9 +3740,9 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfWriter.write_vcf_file" class="do
 <p>Determines the file writing method based on the compression type of the spiked VCF file path.
 Writes the VCF contents to the corresponding file format (gzip or uncompressed).</p>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">442</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">442</span>
 <span class="normal">443</span>
 <span class="normal">444</span>
 <span class="normal">445</span>
@@ -3608,7 +3758,7 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfWriter.write_vcf_file" class="do
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">write_gzip</span><span class="p">()</span> <span class="k">if</span> <span class="n">is_gzipped</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">spiked_vcf_file_path</span><span class="p">)</span> <span class="k">else</span> <span class="bp">self</span><span class="o">.</span><span class="n">write_uncompressed</span><span class="p">()</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
@@ -3619,15 +3769,17 @@ <h3 id="src.pheval.prepare.create_spiked_vcf.VcfWriter.write_vcf_file" class="do
 
   </div>
 
+
 </div>
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.prepare.create_spiked_vcf.check_variant_assembly" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">check_variant_assembly</span><span class="p">(</span><span class="n">proband_causative_variants</span><span class="p">,</span> <span class="n">vcf_header</span><span class="p">,</span> <span class="n">phenopacket_path</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">check_variant_assembly</span><span class="p">(</span><span class="n">proband_causative_variants</span><span class="p">,</span> <span class="n">vcf_header</span><span class="p">,</span> <span class="n">phenopacket_path</span><span class="p">)</span></code>
 
 </h2>
 
@@ -3636,6 +3788,8 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.check_variant_assembly" class="doc
   
       <p>Check the assembly of the variant assembly against the VCF.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -3652,7 +3806,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.check_variant_assembly" class="doc
           <td>
                 <code><span title="typing.List">List</span>[<span title="pheval.utils.phenopacket_utils.ProbandCausativeVariant">ProbandCausativeVariant</span>]</code>
           </td>
-          <td><p>A list of causative variants from the proband.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>A list of causative variants from the proband.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3662,7 +3820,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.check_variant_assembly" class="doc
           <td>
                 <code><a class="autorefs autorefs-internal" title="src.pheval.prepare.create_spiked_vcf.VcfHeader" href="#src.pheval.prepare.create_spiked_vcf.VcfHeader">VcfHeader</a></code>
           </td>
-          <td><p>An instance of VcfHeader representing the VCF file's header.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>An instance of VcfHeader representing the VCF file's header.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3672,7 +3834,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.check_variant_assembly" class="doc
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The path to the Phenopacket file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The path to the Phenopacket file.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3680,6 +3846,8 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.check_variant_assembly" class="doc
     </tbody>
   </table>
 
+
+
   <p><strong>Raises:</strong></p>
   <table>
     <thead>
@@ -3693,20 +3861,28 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.check_variant_assembly" class="doc
           <td>
                 <code>ValueError</code>
           </td>
-          <td><p>If there are too many or incompatible genome assemblies found.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>If there are too many or incompatible genome assemblies found.</p>
+            </div>
+          </td>
         </tr>
         <tr>
           <td>
                 <code><span title="pheval.utils.phenopacket_utils.IncompatibleGenomeAssemblyError">IncompatibleGenomeAssemblyError</span></code>
           </td>
-          <td><p>If the assembly in the Phenopacket does not match the VCF assembly.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>If the assembly in the Phenopacket does not match the VCF assembly.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">249</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">249</span>
 <span class="normal">250</span>
 <span class="normal">251</span>
 <span class="normal">252</span>
@@ -3772,17 +3948,18 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.check_variant_assembly" class="doc
             <span class="n">assembly</span><span class="o">=</span><span class="n">phenopacket_assembly</span><span class="p">,</span> <span class="n">phenopacket</span><span class="o">=</span><span class="n">phenopacket_path</span>
         <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.prepare.create_spiked_vcf.create_spiked_vcf" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">create_spiked_vcf</span><span class="p">(</span><span class="n">output_dir</span><span class="p">,</span> <span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">hg19_template_vcf</span><span class="p">,</span> <span class="n">hg38_template_vcf</span><span class="p">,</span> <span class="n">hg19_vcf_dir</span><span class="p">,</span> <span class="n">hg38_vcf_dir</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">create_spiked_vcf</span><span class="p">(</span><span class="n">output_dir</span><span class="p">,</span> <span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">hg19_template_vcf</span><span class="p">,</span> <span class="n">hg38_template_vcf</span><span class="p">,</span> <span class="n">hg19_vcf_dir</span><span class="p">,</span> <span class="n">hg38_vcf_dir</span><span class="p">)</span></code>
 
 </h2>
 
@@ -3791,6 +3968,8 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.create_spiked_vcf" class="doc doc-h
   
       <p>Create a spiked VCF for a Phenopacket.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -3807,7 +3986,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.create_spiked_vcf" class="doc doc-h
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory to store the generated spiked VCF file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory to store the generated spiked VCF file.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3817,7 +4000,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.create_spiked_vcf" class="doc doc-h
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the Phenopacket file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the Phenopacket file.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3827,7 +4014,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.create_spiked_vcf" class="doc doc-h
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the hg19 template VCF file (optional).</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the hg19 template VCF file (optional).</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3837,7 +4028,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.create_spiked_vcf" class="doc doc-h
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the hg38 template VCF file (optional).</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the hg38 template VCF file (optional).</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3847,7 +4042,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.create_spiked_vcf" class="doc doc-h
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory containing the hg19 VCF files (optional).</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory containing the hg19 VCF files (optional).</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3857,7 +4056,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.create_spiked_vcf" class="doc doc-h
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory containing the hg38 VCF files (optional).</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory containing the hg38 VCF files (optional).</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3865,6 +4068,8 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.create_spiked_vcf" class="doc doc-h
     </tbody>
   </table>
 
+
+
   <p><strong>Raises:</strong></p>
   <table>
     <thead>
@@ -3878,14 +4083,18 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.create_spiked_vcf" class="doc doc-h
           <td>
                 <code><span title="pheval.prepare.custom_exceptions.InputError">InputError</span></code>
           </td>
-          <td><p>If both hg19_template_vcf and hg38_template_vcf are None.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>If both hg19_template_vcf and hg38_template_vcf are None.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">573</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">573</span>
 <span class="normal">574</span>
 <span class="normal">575</span>
 <span class="normal">576</span>
@@ -3943,17 +4152,18 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.create_spiked_vcf" class="doc doc-h
         <span class="n">hg19_vcf_info</span><span class="p">,</span> <span class="n">hg38_vcf_info</span><span class="p">,</span> <span class="n">hg19_vcf_dir</span><span class="p">,</span> <span class="n">hg38_vcf_dir</span><span class="p">,</span> <span class="n">output_dir</span><span class="p">,</span> <span class="n">phenopacket_path</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.prepare.create_spiked_vcf.create_spiked_vcfs" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">create_spiked_vcfs</span><span class="p">(</span><span class="n">output_dir</span><span class="p">,</span> <span class="n">phenopacket_dir</span><span class="p">,</span> <span class="n">hg19_template_vcf</span><span class="p">,</span> <span class="n">hg38_template_vcf</span><span class="p">,</span> <span class="n">hg19_vcf_dir</span><span class="p">,</span> <span class="n">hg38_vcf_dir</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">create_spiked_vcfs</span><span class="p">(</span><span class="n">output_dir</span><span class="p">,</span> <span class="n">phenopacket_dir</span><span class="p">,</span> <span class="n">hg19_template_vcf</span><span class="p">,</span> <span class="n">hg38_template_vcf</span><span class="p">,</span> <span class="n">hg19_vcf_dir</span><span class="p">,</span> <span class="n">hg38_vcf_dir</span><span class="p">)</span></code>
 
 </h2>
 
@@ -3962,6 +4172,8 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.create_spiked_vcfs" class="doc doc-
   
       <p>Create a spiked VCF for a directory of Phenopackets.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -3978,7 +4190,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.create_spiked_vcfs" class="doc doc-
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory to store the generated spiked VCF file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory to store the generated spiked VCF file.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3988,7 +4204,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.create_spiked_vcfs" class="doc doc-
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the Phenopacket directory.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the Phenopacket directory.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3998,7 +4218,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.create_spiked_vcfs" class="doc doc-
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the template hg19 VCF file (optional).</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the template hg19 VCF file (optional).</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4008,7 +4232,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.create_spiked_vcfs" class="doc doc-
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the template hg19 VCF file (optional).</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the template hg19 VCF file (optional).</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4018,7 +4246,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.create_spiked_vcfs" class="doc doc-
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory containing the hg19 VCF files (optional).</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory containing the hg19 VCF files (optional).</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4028,7 +4260,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.create_spiked_vcfs" class="doc doc-
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory containing the hg38 VCF files (optional).</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory containing the hg38 VCF files (optional).</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4036,6 +4272,8 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.create_spiked_vcfs" class="doc doc-
     </tbody>
   </table>
 
+
+
   <p><strong>Raises:</strong></p>
   <table>
     <thead>
@@ -4049,14 +4287,18 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.create_spiked_vcfs" class="doc doc-
           <td>
                 <code><span title="pheval.prepare.custom_exceptions.InputError">InputError</span></code>
           </td>
-          <td><p>If both hg19_template_vcf and hg38_template_vcf are None.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>If both hg19_template_vcf and hg38_template_vcf are None.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">604</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">604</span>
 <span class="normal">605</span>
 <span class="normal">606</span>
 <span class="normal">607</span>
@@ -4126,17 +4368,18 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.create_spiked_vcfs" class="doc doc-
             <span class="n">hg19_vcf_info</span><span class="p">,</span> <span class="n">hg38_vcf_info</span><span class="p">,</span> <span class="n">hg19_vcf_dir</span><span class="p">,</span> <span class="n">hg38_vcf_dir</span><span class="p">,</span> <span class="n">output_dir</span><span class="p">,</span> <span class="n">phenopacket_path</span>
         <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.prepare.create_spiked_vcf.generate_spiked_vcf_file" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">generate_spiked_vcf_file</span><span class="p">(</span><span class="n">output_dir</span><span class="p">,</span> <span class="n">phenopacket</span><span class="p">,</span> <span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">hg19_vcf_info</span><span class="p">,</span> <span class="n">hg38_vcf_info</span><span class="p">,</span> <span class="n">hg19_vcf_dir</span><span class="p">,</span> <span class="n">hg38_vcf_dir</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">generate_spiked_vcf_file</span><span class="p">(</span><span class="n">output_dir</span><span class="p">,</span> <span class="n">phenopacket</span><span class="p">,</span> <span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">hg19_vcf_info</span><span class="p">,</span> <span class="n">hg38_vcf_info</span><span class="p">,</span> <span class="n">hg19_vcf_dir</span><span class="p">,</span> <span class="n">hg38_vcf_dir</span><span class="p">)</span></code>
 
 </h2>
 
@@ -4145,6 +4388,8 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.generate_spiked_vcf_file" class="do
   
       <p>Write spiked VCF contents to a new file.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -4161,7 +4406,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.generate_spiked_vcf_file" class="do
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the directory to store the generated file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the directory to store the generated file.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4171,7 +4420,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.generate_spiked_vcf_file" class="do
           <td>
                 <code><span title="typing.Union">Union</span>[<span title="phenopackets.Phenopacket">Phenopacket</span>, <span title="phenopackets.Family">Family</span>]</code>
           </td>
-          <td><p>Phenopacket or Family containing causative variants.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Phenopacket or Family containing causative variants.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4181,7 +4434,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.generate_spiked_vcf_file" class="do
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the Phenopacket file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the Phenopacket file.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4191,7 +4448,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.generate_spiked_vcf_file" class="do
           <td>
                 <code><a class="autorefs autorefs-internal" title="src.pheval.prepare.create_spiked_vcf.VcfFile" href="#src.pheval.prepare.create_spiked_vcf.VcfFile">VcfFile</a></code>
           </td>
-          <td><p>VCF file info for hg19 template vcf.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>VCF file info for hg19 template vcf.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4201,7 +4462,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.generate_spiked_vcf_file" class="do
           <td>
                 <code><a class="autorefs autorefs-internal" title="src.pheval.prepare.create_spiked_vcf.VcfFile" href="#src.pheval.prepare.create_spiked_vcf.VcfFile">VcfFile</a></code>
           </td>
-          <td><p>VCF file info for hg38 template vcf.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>VCF file info for hg38 template vcf.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4211,7 +4476,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.generate_spiked_vcf_file" class="do
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory containing the hg19 VCF files.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory containing the hg19 VCF files.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4221,35 +4490,23 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.generate_spiked_vcf_file" class="do
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory containing the hg38 VCF files.</p></td>
           <td>
-              <em>required</em>
+            <div class="doc-md-description">
+              <p>The directory containing the hg38 VCF files.</p>
+            </div>
           </td>
-        </tr>
-    </tbody>
-  </table>
-
-  <p><strong>Returns:</strong></p>
-  <table>
-    <thead>
-      <tr>
-<th>Name</th>        <th>Type</th>
-        <th>Description</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-<td><code>File</code></td>          <td>
-                <code><span title="phenopackets.File">File</span></code>
+          <td>
+              <em>required</em>
           </td>
-          <td><p>The generated File object representing the newly created spiked VCF file.</p></td>
         </tr>
     </tbody>
   </table>
+      <p>Returns:
+    File: The generated File object representing the newly created spiked VCF file.</p>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">498</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">498</span>
 <span class="normal">499</span>
 <span class="normal">500</span>
 <span class="normal">501</span>
@@ -4317,17 +4574,18 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.generate_spiked_vcf_file" class="do
         <span class="n">file_attributes</span><span class="o">=</span><span class="p">{</span><span class="s2">&quot;fileFormat&quot;</span><span class="p">:</span> <span class="s2">&quot;vcf&quot;</span><span class="p">,</span> <span class="s2">&quot;genomeAssembly&quot;</span><span class="p">:</span> <span class="n">vcf_assembly</span><span class="p">},</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.prepare.create_spiked_vcf.read_vcf" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">read_vcf</span><span class="p">(</span><span class="n">vcf_file</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">read_vcf</span><span class="p">(</span><span class="n">vcf_file</span><span class="p">)</span></code>
 
 </h2>
 
@@ -4336,6 +4594,8 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.read_vcf" class="doc doc-heading">
   
       <p>Read the contents of a VCF file into memory, handling both uncompressed and gzipped files.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -4352,7 +4612,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.read_vcf" class="doc doc-heading">
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The path to the VCF file to be read.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The path to the VCF file to be read.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4360,6 +4624,8 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.read_vcf" class="doc doc-heading">
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -4373,14 +4639,18 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.read_vcf" class="doc doc-heading">
           <td>
                 <code><span title="typing.List">List</span>[str]</code>
           </td>
-          <td><p>List[str]: A list containing the lines of the VCF file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List[str]: A list containing the lines of the VCF file.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 93</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 93</span>
 <span class="normal"> 94</span>
 <span class="normal"> 95</span>
 <span class="normal"> 96</span>
@@ -4414,17 +4684,18 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.read_vcf" class="doc doc-heading">
     <span class="n">vcf</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
     <span class="k">return</span> <span class="n">vcf_contents</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.prepare.create_spiked_vcf.select_vcf_template" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">select_vcf_template</span><span class="p">(</span><span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">proband_causative_variants</span><span class="p">,</span> <span class="n">hg19_vcf_info</span><span class="p">,</span> <span class="n">hg38_vcf_info</span><span class="p">,</span> <span class="n">hg19_vcf_dir</span><span class="p">,</span> <span class="n">hg38_vcf_dir</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">select_vcf_template</span><span class="p">(</span><span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">proband_causative_variants</span><span class="p">,</span> <span class="n">hg19_vcf_info</span><span class="p">,</span> <span class="n">hg38_vcf_info</span><span class="p">,</span> <span class="n">hg19_vcf_dir</span><span class="p">,</span> <span class="n">hg38_vcf_dir</span><span class="p">)</span></code>
 
 </h2>
 
@@ -4433,6 +4704,8 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.select_vcf_template" class="doc doc
   
       <p>Select the appropriate VCF template based on the assembly information of the proband causative variants.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -4449,7 +4722,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.select_vcf_template" class="doc doc
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The path to the Phenopacket file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The path to the Phenopacket file.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4459,7 +4736,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.select_vcf_template" class="doc doc
           <td>
                 <code><span title="typing.List">List</span>[<span title="pheval.utils.phenopacket_utils.ProbandCausativeVariant">ProbandCausativeVariant</span>]</code>
           </td>
-          <td><p>A list of causative variants from the proband.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>A list of causative variants from the proband.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4469,7 +4750,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.select_vcf_template" class="doc doc
           <td>
                 <code><a class="autorefs autorefs-internal" title="src.pheval.prepare.create_spiked_vcf.VcfFile" href="#src.pheval.prepare.create_spiked_vcf.VcfFile">VcfFile</a></code>
           </td>
-          <td><p>VCF file info for hg19 template vcf.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>VCF file info for hg19 template vcf.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4479,7 +4764,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.select_vcf_template" class="doc doc
           <td>
                 <code><a class="autorefs autorefs-internal" title="src.pheval.prepare.create_spiked_vcf.VcfFile" href="#src.pheval.prepare.create_spiked_vcf.VcfFile">VcfFile</a></code>
           </td>
-          <td><p>CF file info for hg38 template vcf.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>CF file info for hg38 template vcf.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4489,7 +4778,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.select_vcf_template" class="doc doc
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory containing the hg19 VCF files.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory containing the hg19 VCF files.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4499,7 +4792,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.select_vcf_template" class="doc doc
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory containing the hg38 VCF files.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory containing the hg38 VCF files.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4507,6 +4804,8 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.select_vcf_template" class="doc doc
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -4520,14 +4819,18 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.select_vcf_template" class="doc doc
 <td><code>VcfFile</code></td>          <td>
                 <code><a class="autorefs autorefs-internal" title="src.pheval.prepare.create_spiked_vcf.VcfFile" href="#src.pheval.prepare.create_spiked_vcf.VcfFile">VcfFile</a></code>
           </td>
-          <td><p>The selected VCF template file based on the assembly information of the proband causative variants.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The selected VCF template file based on the assembly information of the proband causative variants.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">206</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">206</span>
 <span class="normal">207</span>
 <span class="normal">208</span>
 <span class="normal">209</span>
@@ -4609,17 +4912,18 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.select_vcf_template" class="doc doc
             <span class="n">proband_causative_variants</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">assembly</span><span class="p">,</span> <span class="n">phenopacket_path</span>
         <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.prepare.create_spiked_vcf.spike_and_update_phenopacket" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">spike_and_update_phenopacket</span><span class="p">(</span><span class="n">hg19_vcf_info</span><span class="p">,</span> <span class="n">hg38_vcf_info</span><span class="p">,</span> <span class="n">hg19_vcf_dir</span><span class="p">,</span> <span class="n">hg38_vcf_dir</span><span class="p">,</span> <span class="n">output_dir</span><span class="p">,</span> <span class="n">phenopacket_path</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">spike_and_update_phenopacket</span><span class="p">(</span><span class="n">hg19_vcf_info</span><span class="p">,</span> <span class="n">hg38_vcf_info</span><span class="p">,</span> <span class="n">hg19_vcf_dir</span><span class="p">,</span> <span class="n">hg38_vcf_dir</span><span class="p">,</span> <span class="n">output_dir</span><span class="p">,</span> <span class="n">phenopacket_path</span><span class="p">)</span></code>
 
 </h2>
 
@@ -4629,6 +4933,8 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_and_update_phenopacket" class
       <p>Spike the VCF files with genetic variants relevant to the provided Phenopacket, update the Phenopacket
 accordingly, and write the updated Phenopacket to the specified output directory.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -4645,7 +4951,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_and_update_phenopacket" class
           <td>
                 <code><a class="autorefs autorefs-internal" title="src.pheval.prepare.create_spiked_vcf.VcfFile" href="#src.pheval.prepare.create_spiked_vcf.VcfFile">VcfFile</a></code>
           </td>
-          <td><p>VCF file info for hg19 template vcf.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>VCF file info for hg19 template vcf.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4655,7 +4965,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_and_update_phenopacket" class
           <td>
                 <code><a class="autorefs autorefs-internal" title="src.pheval.prepare.create_spiked_vcf.VcfFile" href="#src.pheval.prepare.create_spiked_vcf.VcfFile">VcfFile</a></code>
           </td>
-          <td><p>VCF file info for hg38 template vcf.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>VCF file info for hg38 template vcf.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4665,7 +4979,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_and_update_phenopacket" class
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory containing the hg19 VCF files.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory containing the hg19 VCF files.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4675,7 +4993,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_and_update_phenopacket" class
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory containing the hg38 VCF files.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory containing the hg38 VCF files.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4685,7 +5007,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_and_update_phenopacket" class
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Directory where the updated Phenopacket will be saved.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Directory where the updated Phenopacket will be saved.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4695,7 +5021,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_and_update_phenopacket" class
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the original Phenopacket file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the original Phenopacket file.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4703,6 +5033,8 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_and_update_phenopacket" class
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -4716,14 +5048,18 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_and_update_phenopacket" class
           <td>
                 <code>None</code>
           </td>
-          <td><p>None</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>None</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">534</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">534</span>
 <span class="normal">535</span>
 <span class="normal">536</span>
 <span class="normal">537</span>
@@ -4797,17 +5133,18 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_and_update_phenopacket" class
     <span class="p">)</span>
     <span class="n">write_phenopacket</span><span class="p">(</span><span class="n">updated_phenopacket</span><span class="p">,</span> <span class="n">phenopacket_path</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.prepare.create_spiked_vcf.spike_vcf_contents" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">spike_vcf_contents</span><span class="p">(</span><span class="n">phenopacket</span><span class="p">,</span> <span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">hg19_vcf_info</span><span class="p">,</span> <span class="n">hg38_vcf_info</span><span class="p">,</span> <span class="n">hg19_vcf_dir</span><span class="p">,</span> <span class="n">hg38_vcf_dir</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">spike_vcf_contents</span><span class="p">(</span><span class="n">phenopacket</span><span class="p">,</span> <span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">hg19_vcf_info</span><span class="p">,</span> <span class="n">hg38_vcf_info</span><span class="p">,</span> <span class="n">hg19_vcf_dir</span><span class="p">,</span> <span class="n">hg38_vcf_dir</span><span class="p">)</span></code>
 
 </h2>
 
@@ -4816,6 +5153,8 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_vcf_contents" class="doc doc-
   
       <p>Spike VCF records with variants obtained from a Phenopacket or Family.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -4832,7 +5171,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_vcf_contents" class="doc doc-
           <td>
                 <code><span title="typing.Union">Union</span>[<span title="phenopackets.Phenopacket">Phenopacket</span>, <span title="phenopackets.Family">Family</span>]</code>
           </td>
-          <td><p>Phenopacket or Family containing causative variants.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Phenopacket or Family containing causative variants.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4842,7 +5185,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_vcf_contents" class="doc doc-
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the Phenopacket file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the Phenopacket file.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4852,7 +5199,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_vcf_contents" class="doc doc-
           <td>
                 <code><a class="autorefs autorefs-internal" title="src.pheval.prepare.create_spiked_vcf.VcfFile" href="#src.pheval.prepare.create_spiked_vcf.VcfFile">VcfFile</a></code>
           </td>
-          <td><p>VCF file info for hg19 template vcf.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>VCF file info for hg19 template vcf.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4862,7 +5213,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_vcf_contents" class="doc doc-
           <td>
                 <code><a class="autorefs autorefs-internal" title="src.pheval.prepare.create_spiked_vcf.VcfFile" href="#src.pheval.prepare.create_spiked_vcf.VcfFile">VcfFile</a></code>
           </td>
-          <td><p>VCF file info for hg38 template vcf.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>VCF file info for hg38 template vcf.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4872,7 +5227,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_vcf_contents" class="doc doc-
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory containing the hg19 VCF files.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory containing the hg19 VCF files.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4882,7 +5241,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_vcf_contents" class="doc doc-
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory containing the hg38 VCF files.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory containing the hg38 VCF files.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4890,6 +5253,8 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_vcf_contents" class="doc doc-
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -4903,16 +5268,20 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_vcf_contents" class="doc doc-
           <td>
                 <code>tuple[str, <span title="typing.List">List</span>[str]]</code>
           </td>
-          <td><p>A tuple containing:
+          <td>
+            <div class="doc-md-description">
+              <p>A tuple containing:
 assembly (str): The genome assembly information extracted from VCF header.
-modified_vcf_contents (List[str]): Modified VCF records with spiked variants.</p></td>
+modified_vcf_contents (List[str]): Modified VCF records with spiked variants.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">452</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">452</span>
 <span class="normal">453</span>
 <span class="normal">454</span>
 <span class="normal">455</span>
@@ -5000,17 +5369,18 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_vcf_contents" class="doc doc-
         <span class="p">)</span><span class="o">.</span><span class="n">construct_vcf</span><span class="p">(</span><span class="n">chosen_template_vcf</span><span class="o">.</span><span class="n">vcf_file_name</span><span class="p">),</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.prepare.create_spiked_vcf.spike_vcfs" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">spike_vcfs</span><span class="p">(</span><span class="n">output_dir</span><span class="p">,</span> <span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">phenopacket_dir</span><span class="p">,</span> <span class="n">hg19_template_vcf</span><span class="p">,</span> <span class="n">hg38_template_vcf</span><span class="p">,</span> <span class="n">hg19_vcf_dir</span><span class="p">,</span> <span class="n">hg38_vcf_dir</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">spike_vcfs</span><span class="p">(</span><span class="n">output_dir</span><span class="p">,</span> <span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">phenopacket_dir</span><span class="p">,</span> <span class="n">hg19_template_vcf</span><span class="p">,</span> <span class="n">hg38_template_vcf</span><span class="p">,</span> <span class="n">hg19_vcf_dir</span><span class="p">,</span> <span class="n">hg38_vcf_dir</span><span class="p">)</span></code>
 
 </h2>
 
@@ -5019,6 +5389,8 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_vcfs" class="doc doc-heading"
   
       <p>Create spiked VCF from either a Phenopacket or a Phenopacket directory.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -5035,7 +5407,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_vcfs" class="doc doc-heading"
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory to store the generated spiked VCF file(s).</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory to store the generated spiked VCF file(s).</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -5045,7 +5421,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_vcfs" class="doc doc-heading"
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to a single Phenopacket file (optional).</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to a single Phenopacket file (optional).</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -5055,7 +5435,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_vcfs" class="doc doc-heading"
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to a directory containing Phenopacket files (optional).</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to a directory containing Phenopacket files (optional).</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -5065,7 +5449,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_vcfs" class="doc doc-heading"
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the hg19 template VCF file (optional).</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the hg19 template VCF file (optional).</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -5075,7 +5463,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_vcfs" class="doc doc-heading"
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the hg38 template VCF file (optional).</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the hg38 template VCF file (optional).</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -5085,7 +5477,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_vcfs" class="doc doc-heading"
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory containing the hg19 VCF files (optional).</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory containing the hg19 VCF files (optional).</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -5095,7 +5491,11 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_vcfs" class="doc doc-heading"
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory containing the hg38 VCF files (optional).</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory containing the hg38 VCF files (optional).</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -5103,9 +5503,9 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_vcfs" class="doc doc-heading"
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">641</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/create_spiked_vcf.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">641</span>
 <span class="normal">642</span>
 <span class="normal">643</span>
 <span class="normal">644</span>
@@ -5183,7 +5583,7 @@ <h2 id="src.pheval.prepare.create_spiked_vcf.spike_vcfs" class="doc doc-heading"
             <span class="n">hg38_vcf_dir</span><span class="p">,</span>
         <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
diff --git a/api/pheval/prepare/custom_exceptions/index.html b/api/pheval/prepare/custom_exceptions/index.html
index e58649b1c..0c41104cb 100644
--- a/api/pheval/prepare/custom_exceptions/index.html
+++ b/api/pheval/prepare/custom_exceptions/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/prepare/custom_exceptions/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -856,7 +856,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.custom_exceptions" class="md-nav__link">
-    src.pheval.prepare.custom_exceptions
+    custom_exceptions
   </a>
   
 </li>
@@ -1221,7 +1221,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.custom_exceptions" class="md-nav__link">
-    src.pheval.prepare.custom_exceptions
+    custom_exceptions
   </a>
   
 </li>
@@ -1265,6 +1265,7 @@ <h1>Custom exceptions</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.prepare.custom_exceptions"></a>
   <div class="doc doc-contents first">
 
@@ -1284,23 +1285,22 @@ <h1>Custom exceptions</h1>
 
 
 <h2 id="src.pheval.prepare.custom_exceptions.InputError" class="doc doc-heading">
-        <code>InputError</code>
+          <code>InputError</code>
 
 
 </h2>
 
 
   <div class="doc doc-contents ">
-      <p class="doc doc-class-bases">
-        Bases: <code>Exception</code></p>
+          <p class="doc doc-class-bases">
+            Bases: <code>Exception</code></p>
 
   
       <p>Exception raised for missing required inputs.</p>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/prepare/custom_exceptions.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 4</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/prepare/custom_exceptions.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 4</span>
 <span class="normal"> 5</span>
 <span class="normal"> 6</span>
 <span class="normal"> 7</span>
@@ -1320,7 +1320,7 @@ <h2 id="src.pheval.prepare.custom_exceptions.InputError" class="doc doc-heading"
     <span class="k">def</span> <span class="fm">__str__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
         <span class="k">return</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">message</span><span class="si">}</span><span class="s2"> -&gt; </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">file</span><span class="si">}</span><span class="s2"> &quot;</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1340,6 +1340,7 @@ <h2 id="src.pheval.prepare.custom_exceptions.InputError" class="doc doc-heading"
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -1347,23 +1348,22 @@ <h2 id="src.pheval.prepare.custom_exceptions.InputError" class="doc doc-heading"
 
 
 <h2 id="src.pheval.prepare.custom_exceptions.MutuallyExclusiveOptionError" class="doc doc-heading">
-        <code>MutuallyExclusiveOptionError</code>
+          <code>MutuallyExclusiveOptionError</code>
 
 
 </h2>
 
 
   <div class="doc doc-contents ">
-      <p class="doc doc-class-bases">
-        Bases: <code><span title="click.Option">Option</span></code></p>
+          <p class="doc doc-class-bases">
+            Bases: <code><span title="click.Option">Option</span></code></p>
 
   
       <p>Exception raised for when</p>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/prepare/custom_exceptions.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">16</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/prepare/custom_exceptions.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">16</span>
 <span class="normal">17</span>
 <span class="normal">18</span>
 <span class="normal">19</span>
@@ -1405,7 +1405,7 @@ <h2 id="src.pheval.prepare.custom_exceptions.MutuallyExclusiveOptionError" class
 
         <span class="k">return</span> <span class="nb">super</span><span class="p">(</span><span class="n">MutuallyExclusiveOptionError</span><span class="p">,</span> <span class="bp">self</span><span class="p">)</span><span class="o">.</span><span class="n">handle_parse_result</span><span class="p">(</span><span class="n">ctx</span><span class="p">,</span> <span class="n">opts</span><span class="p">,</span> <span class="n">args</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1425,6 +1425,7 @@ <h2 id="src.pheval.prepare.custom_exceptions.MutuallyExclusiveOptionError" class
 
   </div>
 
+
 </div>
 
 
diff --git a/api/pheval/prepare/prepare_corpus/index.html b/api/pheval/prepare/prepare_corpus/index.html
index a61f0c6fc..cc81d0b34 100644
--- a/api/pheval/prepare/prepare_corpus/index.html
+++ b/api/pheval/prepare/prepare_corpus/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/prepare/prepare_corpus/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -870,14 +870,14 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.prepare_corpus" class="md-nav__link">
-    src.pheval.prepare.prepare_corpus
+    prepare_corpus
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.prepare_corpus.prepare_corpus" class="md-nav__link">
-    prepare_corpus()
+    prepare_corpus
   </a>
   
 </li>
@@ -1214,14 +1214,14 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.prepare_corpus" class="md-nav__link">
-    src.pheval.prepare.prepare_corpus
+    prepare_corpus
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.prepare_corpus.prepare_corpus" class="md-nav__link">
-    prepare_corpus()
+    prepare_corpus
   </a>
   
 </li>
@@ -1251,6 +1251,7 @@ <h1>Prepare corpus</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.prepare.prepare_corpus"></a>
   <div class="doc doc-contents first">
 
@@ -1266,12 +1267,13 @@ <h1>Prepare corpus</h1>
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.prepare.prepare_corpus.prepare_corpus" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">prepare_corpus</span><span class="p">(</span><span class="n">phenopacket_dir</span><span class="p">,</span> <span class="n">variant_analysis</span><span class="p">,</span> <span class="n">gene_analysis</span><span class="p">,</span> <span class="n">disease_analysis</span><span class="p">,</span> <span class="n">gene_identifier</span><span class="p">,</span> <span class="n">hg19_template_vcf</span><span class="p">,</span> <span class="n">hg38_template_vcf</span><span class="p">,</span> <span class="n">hg19_vcf_dir</span><span class="p">,</span> <span class="n">hg38_vcf_dir</span><span class="p">,</span> <span class="n">output_dir</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">prepare_corpus</span><span class="p">(</span><span class="n">phenopacket_dir</span><span class="p">,</span> <span class="n">variant_analysis</span><span class="p">,</span> <span class="n">gene_analysis</span><span class="p">,</span> <span class="n">disease_analysis</span><span class="p">,</span> <span class="n">gene_identifier</span><span class="p">,</span> <span class="n">hg19_template_vcf</span><span class="p">,</span> <span class="n">hg38_template_vcf</span><span class="p">,</span> <span class="n">hg19_vcf_dir</span><span class="p">,</span> <span class="n">hg38_vcf_dir</span><span class="p">,</span> <span class="n">output_dir</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1281,6 +1283,8 @@ <h2 id="src.pheval.prepare.prepare_corpus.prepare_corpus" class="doc doc-heading
       <p>Prepare a corpus of Phenopackets for analysis, optionally checking for complete variant records and updating
 gene identifiers.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1297,7 +1301,11 @@ <h2 id="src.pheval.prepare.prepare_corpus.prepare_corpus" class="doc doc-heading
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The path to the directory containing Phenopackets.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The path to the directory containing Phenopackets.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1307,7 +1315,11 @@ <h2 id="src.pheval.prepare.prepare_corpus.prepare_corpus" class="doc doc-heading
           <td>
                 <code>bool</code>
           </td>
-          <td><p>If True, check for complete variant records in the Phenopackets.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>If True, check for complete variant records in the Phenopackets.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1317,7 +1329,11 @@ <h2 id="src.pheval.prepare.prepare_corpus.prepare_corpus" class="doc doc-heading
           <td>
                 <code>bool</code>
           </td>
-          <td><p>If True, check for complete gene records in the Phenopackets.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>If True, check for complete gene records in the Phenopackets.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1327,7 +1343,11 @@ <h2 id="src.pheval.prepare.prepare_corpus.prepare_corpus" class="doc doc-heading
           <td>
                 <code>bool</code>
           </td>
-          <td><p>If True, check for complete disease records in the Phenopackets.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>If True, check for complete disease records in the Phenopackets.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1337,7 +1357,11 @@ <h2 id="src.pheval.prepare.prepare_corpus.prepare_corpus" class="doc doc-heading
           <td>
                 <code>str</code>
           </td>
-          <td><p>Identifier for updating gene identifiers, if applicable.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Identifier for updating gene identifiers, if applicable.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1347,7 +1371,11 @@ <h2 id="src.pheval.prepare.prepare_corpus.prepare_corpus" class="doc doc-heading
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the hg19 template VCF file (optional), to spike variants into</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the hg19 template VCF file (optional), to spike variants into</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1357,7 +1385,11 @@ <h2 id="src.pheval.prepare.prepare_corpus.prepare_corpus" class="doc doc-heading
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the hg38 template VCF file (optional), to spike variants into</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the hg38 template VCF file (optional), to spike variants into</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1367,7 +1399,11 @@ <h2 id="src.pheval.prepare.prepare_corpus.prepare_corpus" class="doc doc-heading
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the directory containing hg19 template VCF files (optional).</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the directory containing hg19 template VCF files (optional).</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1377,7 +1413,11 @@ <h2 id="src.pheval.prepare.prepare_corpus.prepare_corpus" class="doc doc-heading
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the directory containing hg38 template VCF files (optional).</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the directory containing hg38 template VCF files (optional).</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1387,22 +1427,24 @@ <h2 id="src.pheval.prepare.prepare_corpus.prepare_corpus" class="doc doc-heading
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory to save the prepared Phenopackets and, optionally, VCF files.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory to save the prepared Phenopackets and, optionally, VCF files.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
         </tr>
     </tbody>
   </table>
+      <p>Notes:
+    To spike variants into VCFs for variant-based analysis at least one of hg19_template_vcf, hg38_template_vcf,
+    hg19_vcf_dir or hg38_vcf_dir is required.</p>
 
-<details class="notes">
-  <summary>Notes</summary>
-  <p>To spike variants into VCFs for variant-based analysis at least one of hg19_template_vcf, hg38_template_vcf,
-hg19_vcf_dir or hg38_vcf_dir is required.</p>
-</details>
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/prepare_corpus.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">13</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/prepare_corpus.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">13</span>
 <span class="normal">14</span>
 <span class="normal">15</span>
 <span class="normal">16</span>
@@ -1558,7 +1600,7 @@ <h2 id="src.pheval.prepare.prepare_corpus.prepare_corpus" class="doc doc-heading
                 <span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">output_dir</span><span class="o">.</span><span class="n">joinpath</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;phenopackets/</span><span class="si">{</span><span class="n">phenopacket_path</span><span class="o">.</span><span class="n">name</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
             <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
diff --git a/api/pheval/prepare/update_phenopacket/index.html b/api/pheval/prepare/update_phenopacket/index.html
index 6acec94cf..746404291 100644
--- a/api/pheval/prepare/update_phenopacket/index.html
+++ b/api/pheval/prepare/update_phenopacket/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/prepare/update_phenopacket/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -884,35 +884,35 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.update_phenopacket" class="md-nav__link">
-    src.pheval.prepare.update_phenopacket
+    update_phenopacket
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.update_phenopacket.create_updated_phenopacket" class="md-nav__link">
-    create_updated_phenopacket()
+    create_updated_phenopacket
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.update_phenopacket.create_updated_phenopackets" class="md-nav__link">
-    create_updated_phenopackets()
+    create_updated_phenopackets
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.update_phenopacket.update_outdated_gene_context" class="md-nav__link">
-    update_outdated_gene_context()
+    update_outdated_gene_context
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.update_phenopacket.update_phenopackets" class="md-nav__link">
-    update_phenopackets()
+    update_phenopackets
   </a>
   
 </li>
@@ -1235,35 +1235,35 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.update_phenopacket" class="md-nav__link">
-    src.pheval.prepare.update_phenopacket
+    update_phenopacket
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.update_phenopacket.create_updated_phenopacket" class="md-nav__link">
-    create_updated_phenopacket()
+    create_updated_phenopacket
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.update_phenopacket.create_updated_phenopackets" class="md-nav__link">
-    create_updated_phenopackets()
+    create_updated_phenopackets
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.update_phenopacket.update_outdated_gene_context" class="md-nav__link">
-    update_outdated_gene_context()
+    update_outdated_gene_context
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.prepare.update_phenopacket.update_phenopackets" class="md-nav__link">
-    update_phenopackets()
+    update_phenopackets
   </a>
   
 </li>
@@ -1293,6 +1293,7 @@ <h1>Update phenopacket</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.prepare.update_phenopacket"></a>
   <div class="doc doc-contents first">
 
@@ -1308,12 +1309,13 @@ <h1>Update phenopacket</h1>
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.prepare.update_phenopacket.create_updated_phenopacket" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">create_updated_phenopacket</span><span class="p">(</span><span class="n">gene_identifier</span><span class="p">,</span> <span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">output_dir</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">create_updated_phenopacket</span><span class="p">(</span><span class="n">gene_identifier</span><span class="p">,</span> <span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">output_dir</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1322,6 +1324,8 @@ <h2 id="src.pheval.prepare.update_phenopacket.create_updated_phenopacket" class=
   
       <p>Update the gene context within the interpretations for a Phenopacket and writes the updated Phenopacket.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1338,7 +1342,11 @@ <h2 id="src.pheval.prepare.update_phenopacket.create_updated_phenopacket" class=
           <td>
                 <code>str</code>
           </td>
-          <td><p>Identifier used to update the gene context.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Identifier used to update the gene context.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1348,7 +1356,11 @@ <h2 id="src.pheval.prepare.update_phenopacket.create_updated_phenopacket" class=
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The path to the input Phenopacket file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The path to the input Phenopacket file.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1358,23 +1370,25 @@ <h2 id="src.pheval.prepare.update_phenopacket.create_updated_phenopacket" class=
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory where the updated Phenopacket will be written.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory where the updated Phenopacket will be written.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
         </tr>
     </tbody>
   </table>
-
-<details class="notes">
-  <summary>Notes</summary>
-  <p>The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id
-to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace
-to describe the gene identifiers.</p>
-</details>
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/update_phenopacket.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">45</span>
+      <p>Notes:
+    The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id
+    to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace
+    to describe the gene identifiers.</p>
+
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/update_phenopacket.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">45</span>
 <span class="normal">46</span>
 <span class="normal">47</span>
 <span class="normal">48</span>
@@ -1410,17 +1424,18 @@ <h2 id="src.pheval.prepare.update_phenopacket.create_updated_phenopacket" class=
     <span class="n">updated_phenopacket</span> <span class="o">=</span> <span class="n">update_outdated_gene_context</span><span class="p">(</span><span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">gene_identifier</span><span class="p">,</span> <span class="n">hgnc_data</span><span class="p">)</span>
     <span class="n">write_phenopacket</span><span class="p">(</span><span class="n">updated_phenopacket</span><span class="p">,</span> <span class="n">output_dir</span><span class="o">.</span><span class="n">joinpath</span><span class="p">(</span><span class="n">phenopacket_path</span><span class="o">.</span><span class="n">name</span><span class="p">))</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.prepare.update_phenopacket.create_updated_phenopackets" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">create_updated_phenopackets</span><span class="p">(</span><span class="n">gene_identifier</span><span class="p">,</span> <span class="n">phenopacket_dir</span><span class="p">,</span> <span class="n">output_dir</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">create_updated_phenopackets</span><span class="p">(</span><span class="n">gene_identifier</span><span class="p">,</span> <span class="n">phenopacket_dir</span><span class="p">,</span> <span class="n">output_dir</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1430,6 +1445,8 @@ <h2 id="src.pheval.prepare.update_phenopacket.create_updated_phenopackets" class
       <p>Update the gene context within the interpretations for a directory of Phenopackets
 and writes the updated Phenopackets.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1446,7 +1463,11 @@ <h2 id="src.pheval.prepare.update_phenopacket.create_updated_phenopackets" class
           <td>
                 <code>str</code>
           </td>
-          <td><p>Identifier used to update the gene context.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Identifier used to update the gene context.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1456,7 +1477,11 @@ <h2 id="src.pheval.prepare.update_phenopacket.create_updated_phenopackets" class
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The path to the input Phenopacket directory.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The path to the input Phenopacket directory.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1466,23 +1491,25 @@ <h2 id="src.pheval.prepare.update_phenopacket.create_updated_phenopackets" class
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory where the updated Phenopackets will be written.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory where the updated Phenopackets will be written.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
         </tr>
     </tbody>
   </table>
-
-<details class="notes">
-  <summary>Notes</summary>
-  <p>The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id
-to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace
-to describe the gene identifiers.</p>
-</details>
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/update_phenopacket.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">65</span>
+      <p>Notes:
+    The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id
+    to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace
+    to describe the gene identifiers.</p>
+
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/update_phenopacket.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">65</span>
 <span class="normal">66</span>
 <span class="normal">67</span>
 <span class="normal">68</span>
@@ -1526,17 +1553,18 @@ <h2 id="src.pheval.prepare.update_phenopacket.create_updated_phenopackets" class
         <span class="p">)</span>
         <span class="n">write_phenopacket</span><span class="p">(</span><span class="n">updated_phenopacket</span><span class="p">,</span> <span class="n">output_dir</span><span class="o">.</span><span class="n">joinpath</span><span class="p">(</span><span class="n">phenopacket_path</span><span class="o">.</span><span class="n">name</span><span class="p">))</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.prepare.update_phenopacket.update_outdated_gene_context" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">update_outdated_gene_context</span><span class="p">(</span><span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">gene_identifier</span><span class="p">,</span> <span class="n">hgnc_data</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">update_outdated_gene_context</span><span class="p">(</span><span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">gene_identifier</span><span class="p">,</span> <span class="n">hgnc_data</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1545,6 +1573,8 @@ <h2 id="src.pheval.prepare.update_phenopacket.update_outdated_gene_context" clas
   
       <p>Update the gene context of the Phenopacket.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1561,7 +1591,11 @@ <h2 id="src.pheval.prepare.update_phenopacket.update_outdated_gene_context" clas
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The path to the Phenopacket file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The path to the Phenopacket file.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1571,7 +1605,11 @@ <h2 id="src.pheval.prepare.update_phenopacket.update_outdated_gene_context" clas
           <td>
                 <code>str</code>
           </td>
-          <td><p>Identifier to update the gene context.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Identifier to update the gene context.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1581,7 +1619,11 @@ <h2 id="src.pheval.prepare.update_phenopacket.update_outdated_gene_context" clas
           <td>
                 <code><span title="collections.defaultdict">defaultdict</span></code>
           </td>
-          <td><p>The HGNC data used for updating.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The HGNC data used for updating.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1589,6 +1631,8 @@ <h2 id="src.pheval.prepare.update_phenopacket.update_outdated_gene_context" clas
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -1602,21 +1646,23 @@ <h2 id="src.pheval.prepare.update_phenopacket.update_outdated_gene_context" clas
           <td>
                 <code><span title="typing.Union">Union</span>[<span title="phenopackets.Phenopacket">Phenopacket</span>, <span title="phenopackets.Family">Family</span>]</code>
           </td>
-          <td><p>Union[Phenopacket, Family]: The updated Phenopacket or Family.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Union[Phenopacket, Family]: The updated Phenopacket or Family.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
-
-<details class="notes">
-  <summary>Notes</summary>
-  <p>This function updates the gene context within the Phenopacket or Family instance.
-The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id
-to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace
-to describe the gene identifiers.</p>
-</details>
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/update_phenopacket.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">18</span>
+      <p>Notes:
+    This function updates the gene context within the Phenopacket or Family instance.
+    The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id
+    to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace
+    to describe the gene identifiers.</p>
+
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/update_phenopacket.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">18</span>
 <span class="normal">19</span>
 <span class="normal">20</span>
 <span class="normal">21</span>
@@ -1666,17 +1712,18 @@ <h2 id="src.pheval.prepare.update_phenopacket.update_outdated_gene_context" clas
     <span class="p">)</span><span class="o">.</span><span class="n">update_genomic_interpretations_gene_identifier</span><span class="p">(</span><span class="n">interpretations</span><span class="p">,</span> <span class="n">phenopacket_path</span><span class="p">)</span>
     <span class="k">return</span> <span class="n">PhenopacketRebuilder</span><span class="p">(</span><span class="n">phenopacket</span><span class="p">)</span><span class="o">.</span><span class="n">update_interpretations</span><span class="p">(</span><span class="n">updated_interpretations</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.prepare.update_phenopacket.update_phenopackets" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">update_phenopackets</span><span class="p">(</span><span class="n">gene_identifier</span><span class="p">,</span> <span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">phenopacket_dir</span><span class="p">,</span> <span class="n">output_dir</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">update_phenopackets</span><span class="p">(</span><span class="n">gene_identifier</span><span class="p">,</span> <span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">phenopacket_dir</span><span class="p">,</span> <span class="n">output_dir</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1685,6 +1732,8 @@ <h2 id="src.pheval.prepare.update_phenopacket.update_phenopackets" class="doc do
   
       <p>Update the gene identifiers in either a single phenopacket or a directory of phenopackets.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1701,7 +1750,11 @@ <h2 id="src.pheval.prepare.update_phenopacket.update_phenopackets" class="doc do
           <td>
                 <code>str</code>
           </td>
-          <td><p>The gene identifier to be updated.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The gene identifier to be updated.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1711,7 +1764,11 @@ <h2 id="src.pheval.prepare.update_phenopacket.update_phenopackets" class="doc do
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The path to a single Phenopacket file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The path to a single Phenopacket file.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1721,7 +1778,11 @@ <h2 id="src.pheval.prepare.update_phenopacket.update_phenopackets" class="doc do
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory containing multiple Phenopacket files.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory containing multiple Phenopacket files.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1731,23 +1792,25 @@ <h2 id="src.pheval.prepare.update_phenopacket.update_phenopackets" class="doc do
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The output directory to save the updated Phenopacket files.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The output directory to save the updated Phenopacket files.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
         </tr>
     </tbody>
   </table>
-
-<details class="notes">
-  <summary>Notes</summary>
-  <p>The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id
-to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace
-to describe the gene identifiers.</p>
-</details>
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/prepare/update_phenopacket.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 89</span>
+      <p>Notes:
+    The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id
+    to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace
+    to describe the gene identifiers.</p>
+
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/prepare/update_phenopacket.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 89</span>
 <span class="normal"> 90</span>
 <span class="normal"> 91</span>
 <span class="normal"> 92</span>
@@ -1789,7 +1852,7 @@ <h2 id="src.pheval.prepare.update_phenopacket.update_phenopackets" class="doc do
     <span class="k">elif</span> <span class="n">phenopacket_dir</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
         <span class="n">create_updated_phenopackets</span><span class="p">(</span><span class="n">gene_identifier</span><span class="p">,</span> <span class="n">phenopacket_dir</span><span class="p">,</span> <span class="n">output_dir</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
diff --git a/api/pheval/run_metadata/index.html b/api/pheval/run_metadata/index.html
index 5eba800e9..8ca3f5a13 100644
--- a/api/pheval/run_metadata/index.html
+++ b/api/pheval/run_metadata/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/run_metadata/">
       
       <link rel="icon" href="../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -449,7 +449,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.run_metadata" class="md-nav__link">
-    src.pheval.run_metadata
+    run_metadata
   </a>
   
 </li>
@@ -1212,7 +1212,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.run_metadata" class="md-nav__link">
-    src.pheval.run_metadata
+    run_metadata
   </a>
   
 </li>
@@ -1249,6 +1249,7 @@ <h1>Run metadata</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.run_metadata"></a>
   <div class="doc doc-contents first">
 
@@ -1268,7 +1269,7 @@ <h1>Run metadata</h1>
 
 
 <h2 id="src.pheval.run_metadata.BasicOutputRunMetaData" class="doc doc-heading">
-        <code>BasicOutputRunMetaData</code>
+          <code>BasicOutputRunMetaData</code>
 
   
   <span class="doc doc-labels">
@@ -1281,87 +1282,19 @@ <h2 id="src.pheval.run_metadata.BasicOutputRunMetaData" class="doc doc-heading">
   <div class="doc doc-contents ">
 
   
-      <p>Class for defining variables for the run metadata.</p>
-
-  <p><strong>Parameters:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Name</th>
-        <th>Type</th>
-        <th>Description</th>
-        <th>Default</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-          <td><code>tool</code></td>
-          <td>
-                <code>str</code>
-          </td>
-          <td><p>Name of the tool implementation</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>tool_version</code></td>
-          <td>
-                <code>str</code>
-          </td>
-          <td><p>Version of the tool implementation</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>config</code></td>
-          <td>
-                <code><span title="pathlib.Path">Path</span></code>
-          </td>
-          <td><p>Path to the config file located in the input directory</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>run_timestamp</code></td>
-          <td>
-                <code>int</code>
-          </td>
-          <td><p>Time taken for run to complete</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>corpus</code></td>
-          <td>
-                <code><span title="pathlib.Path">Path</span></code>
-          </td>
-          <td><p>Path to corpus used in pheval run</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>tool_specific_configuration_options</code></td>
-          <td>
-                <code><span title="typing.Any">Any</span></code>
-          </td>
-          <td><p>Special field that can be overwritten by tool implementations to
-                                       contain any extra tool specific configurations used in the run</p></td>
-          <td>
-                <code>None</code>
-          </td>
-        </tr>
-    </tbody>
-  </table>
-
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/run_metadata.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 8</span>
+      <p>Class for defining variables for the run metadata.
+Args:
+    tool (str): Name of the tool implementation
+    tool_version (str): Version of the tool implementation
+    config (Path): Path to the config file located in the input directory
+    run_timestamp (int): Time taken for run to complete
+    corpus (Path): Path to corpus used in pheval run
+    tool_specific_configuration_options (Any): Special field that can be overwritten by tool implementations to
+                                               contain any extra tool specific configurations used in the run</p>
+
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/run_metadata.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 8</span>
 <span class="normal"> 9</span>
 <span class="normal">10</span>
 <span class="normal">11</span>
@@ -1401,7 +1334,7 @@ <h2 id="src.pheval.run_metadata.BasicOutputRunMetaData" class="doc doc-heading">
     <span class="n">corpus</span><span class="p">:</span> <span class="n">Path</span>
     <span class="n">tool_specific_configuration_options</span><span class="p">:</span> <span class="n">Any</span> <span class="o">=</span> <span class="kc">None</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1421,6 +1354,7 @@ <h2 id="src.pheval.run_metadata.BasicOutputRunMetaData" class="doc doc-heading">
 
   </div>
 
+
 </div>
 
 
diff --git a/api/pheval/runners/runner/index.html b/api/pheval/runners/runner/index.html
index 1ed4c458b..08e14ae71 100644
--- a/api/pheval/runners/runner/index.html
+++ b/api/pheval/runners/runner/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/runners/runner/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -931,7 +931,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.runners.runner" class="md-nav__link">
-    src.pheval.runners.runner
+    runner
   </a>
   
 </li>
@@ -953,35 +953,35 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.runners.runner.PhEvalRunner.build_output_directory_structure" class="md-nav__link">
-    build_output_directory_structure()
+    build_output_directory_structure
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.runners.runner.PhEvalRunner.construct_meta_data" class="md-nav__link">
-    construct_meta_data()
+    construct_meta_data
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.runners.runner.PhEvalRunner.post_process" class="md-nav__link">
-    post_process()
+    post_process
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.runners.runner.PhEvalRunner.prepare" class="md-nav__link">
-    prepare()
+    prepare
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.runners.runner.PhEvalRunner.run" class="md-nav__link">
-    run()
+    run
   </a>
   
 </li>
@@ -1262,7 +1262,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.runners.runner" class="md-nav__link">
-    src.pheval.runners.runner
+    runner
   </a>
   
 </li>
@@ -1284,35 +1284,35 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.runners.runner.PhEvalRunner.build_output_directory_structure" class="md-nav__link">
-    build_output_directory_structure()
+    build_output_directory_structure
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.runners.runner.PhEvalRunner.construct_meta_data" class="md-nav__link">
-    construct_meta_data()
+    construct_meta_data
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.runners.runner.PhEvalRunner.post_process" class="md-nav__link">
-    post_process()
+    post_process
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.runners.runner.PhEvalRunner.prepare" class="md-nav__link">
-    prepare()
+    prepare
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.runners.runner.PhEvalRunner.run" class="md-nav__link">
-    run()
+    run
   </a>
   
 </li>
@@ -1347,6 +1347,7 @@ <h1>Runner</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.runners.runner"></a>
   <div class="doc doc-contents first">
   
@@ -1368,19 +1369,21 @@ <h1>Runner</h1>
 
 
 <h2 id="src.pheval.runners.runner.DefaultPhEvalRunner" class="doc doc-heading">
-        <code>DefaultPhEvalRunner</code>
+          <code>DefaultPhEvalRunner</code>
 
 
 </h2>
 
 
   <div class="doc doc-contents ">
-      <p class="doc doc-class-bases">
-        Bases: <code><a class="autorefs autorefs-internal" title="src.pheval.runners.runner.PhEvalRunner" href="#src.pheval.runners.runner.PhEvalRunner">PhEvalRunner</a></code></p>
+          <p class="doc doc-class-bases">
+            Bases: <code><a class="autorefs autorefs-internal" title="src.pheval.runners.runner.PhEvalRunner" href="../../../../developing_a_pheval_plugin/#src.pheval.runners.runner.PhEvalRunner">PhEvalRunner</a></code></p>
 
   
       <p>DefaultPhEvalRunner</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1395,9 +1398,13 @@ <h2 id="src.pheval.runners.runner.DefaultPhEvalRunner" class="doc doc-heading">
         <tr>
           <td><code>PhEvalRunner</code></td>
           <td>
-                <code><a class="autorefs autorefs-internal" title="src.pheval.runners.runner.PhEvalRunner" href="#src.pheval.runners.runner.PhEvalRunner">PhEvalRunner</a></code>
+                <code><a class="autorefs autorefs-internal" title="src.pheval.runners.runner.PhEvalRunner" href="../../../../developing_a_pheval_plugin/#src.pheval.runners.runner.PhEvalRunner">PhEvalRunner</a></code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>Abstract PhEvalRunnerClass</p>
+            </div>
           </td>
-          <td><p>Abstract PhEvalRunnerClass</p></td>
           <td>
               <em>required</em>
           </td>
@@ -1405,10 +1412,9 @@ <h2 id="src.pheval.runners.runner.DefaultPhEvalRunner" class="doc doc-heading">
     </tbody>
   </table>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/runners/runner.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">130</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/runners/runner.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">130</span>
 <span class="normal">131</span>
 <span class="normal">132</span>
 <span class="normal">133</span>
@@ -1452,7 +1458,7 @@ <h2 id="src.pheval.runners.runner.DefaultPhEvalRunner" class="doc doc-heading">
     <span class="k">def</span> <span class="nf">post_process</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
         <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;post processing&quot;</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1472,6 +1478,7 @@ <h2 id="src.pheval.runners.runner.DefaultPhEvalRunner" class="doc doc-heading">
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -1479,7 +1486,7 @@ <h2 id="src.pheval.runners.runner.DefaultPhEvalRunner" class="doc doc-heading">
 
 
 <h2 id="src.pheval.runners.runner.PhEvalRunner" class="doc doc-heading">
-        <code>PhEvalRunner</code>
+          <code>PhEvalRunner</code>
 
   
   <span class="doc doc-labels">
@@ -1490,16 +1497,15 @@ <h2 id="src.pheval.runners.runner.PhEvalRunner" class="doc doc-heading">
 
 
   <div class="doc doc-contents ">
-      <p class="doc doc-class-bases">
-        Bases: <code><span title="abc.ABC">ABC</span></code></p>
+          <p class="doc doc-class-bases">
+            Bases: <code><span title="abc.ABC">ABC</span></code></p>
 
   
       <p>PhEvalRunner Class</p>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/runners/runner.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 12</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/runners/runner.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 12</span>
 <span class="normal"> 13</span>
 <span class="normal"> 14</span>
 <span class="normal"> 15</span>
@@ -1731,7 +1737,7 @@ <h2 id="src.pheval.runners.runner.PhEvalRunner" class="doc doc-heading">
 <span class="w">        </span><span class="sd">&quot;&quot;&quot;Construct run output meta data&quot;&quot;&quot;</span>
         <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">meta_data</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1745,12 +1751,13 @@ <h2 id="src.pheval.runners.runner.PhEvalRunner" class="doc doc-heading">
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.runners.runner.PhEvalRunner.build_output_directory_structure" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">build_output_directory_structure</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">build_output_directory_structure</span><span class="p">()</span></code>
 
 </h3>
 
@@ -1759,9 +1766,9 @@ <h3 id="src.pheval.runners.runner.PhEvalRunner.build_output_directory_structure"
   
       <p>build output directory structure</p>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/runners/runner.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">87</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/runners/runner.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">87</span>
 <span class="normal">88</span>
 <span class="normal">89</span>
 <span class="normal">90</span>
@@ -1781,17 +1788,18 @@ <h3 id="src.pheval.runners.runner.PhEvalRunner.build_output_directory_structure"
     <span class="k">if</span> <span class="bp">self</span><span class="o">.</span><span class="n">_get_disease_analysis</span><span class="p">():</span>
         <span class="bp">self</span><span class="o">.</span><span class="n">pheval_disease_results_dir</span><span class="o">.</span><span class="n">mkdir</span><span class="p">(</span><span class="n">exist_ok</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.runners.runner.PhEvalRunner.construct_meta_data" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">construct_meta_data</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">construct_meta_data</span><span class="p">()</span></code>
 
 </h3>
 
@@ -1800,25 +1808,26 @@ <h3 id="src.pheval.runners.runner.PhEvalRunner.construct_meta_data" class="doc d
   
       <p>Construct run output meta data</p>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/runners/runner.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">125</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/runners/runner.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">125</span>
 <span class="normal">126</span>
 <span class="normal">127</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span> <span class="nf">construct_meta_data</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;Construct run output meta data&quot;&quot;&quot;</span>
     <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">meta_data</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.runners.runner.PhEvalRunner.post_process" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">post_process</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">post_process</span><span class="p">()</span></code>
   
   <span class="doc doc-labels">
       <small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small>
@@ -1831,25 +1840,26 @@ <h3 id="src.pheval.runners.runner.PhEvalRunner.post_process" class="doc doc-head
   
       <p>post_process</p>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/runners/runner.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">121</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/runners/runner.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">121</span>
 <span class="normal">122</span>
 <span class="normal">123</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@abstractmethod</span>
 <span class="k">def</span> <span class="nf">post_process</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;post_process&quot;&quot;&quot;</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.runners.runner.PhEvalRunner.prepare" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">prepare</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">prepare</span><span class="p">()</span></code>
   
   <span class="doc doc-labels">
       <small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small>
@@ -1862,25 +1872,26 @@ <h3 id="src.pheval.runners.runner.PhEvalRunner.prepare" class="doc doc-heading">
   
       <p>prepare</p>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/runners/runner.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">113</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/runners/runner.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">113</span>
 <span class="normal">114</span>
 <span class="normal">115</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@abstractmethod</span>
 <span class="k">def</span> <span class="nf">prepare</span><span class="p">(</span><span class="bp">self</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">str</span><span class="p">:</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;prepare&quot;&quot;&quot;</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.runners.runner.PhEvalRunner.run" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">run</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">run</span><span class="p">()</span></code>
   
   <span class="doc doc-labels">
       <small class="doc doc-label doc-label-abstractmethod"><code>abstractmethod</code></small>
@@ -1893,15 +1904,15 @@ <h3 id="src.pheval.runners.runner.PhEvalRunner.run" class="doc doc-heading">
   
       <p>run</p>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/runners/runner.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">117</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/runners/runner.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">117</span>
 <span class="normal">118</span>
 <span class="normal">119</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@abstractmethod</span>
 <span class="k">def</span> <span class="nf">run</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;run&quot;&quot;&quot;</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
@@ -1912,6 +1923,7 @@ <h3 id="src.pheval.runners.runner.PhEvalRunner.run" class="doc doc-heading">
 
   </div>
 
+
 </div>
 
 
diff --git a/api/pheval/utils/exomiser/index.html b/api/pheval/utils/exomiser/index.html
index 7a34463f9..b0101d9ea 100644
--- a/api/pheval/utils/exomiser/index.html
+++ b/api/pheval/utils/exomiser/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/utils/exomiser/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -978,14 +978,14 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.exomiser" class="md-nav__link">
-    src.pheval.utils.exomiser
+    exomiser
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.exomiser.semsim_to_exomiserdb" class="md-nav__link">
-    semsim_to_exomiserdb()
+    semsim_to_exomiserdb
   </a>
   
 </li>
@@ -1214,14 +1214,14 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.exomiser" class="md-nav__link">
-    src.pheval.utils.exomiser
+    exomiser
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.exomiser.semsim_to_exomiserdb" class="md-nav__link">
-    semsim_to_exomiserdb()
+    semsim_to_exomiserdb
   </a>
   
 </li>
@@ -1251,6 +1251,7 @@ <h1>Exomiser</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.utils.exomiser"></a>
   <div class="doc doc-contents first">
 
@@ -1266,12 +1267,13 @@ <h1>Exomiser</h1>
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.utils.exomiser.semsim_to_exomiserdb" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">semsim_to_exomiserdb</span><span class="p">(</span><span class="n">input_path</span><span class="p">,</span> <span class="n">object_prefix</span><span class="p">,</span> <span class="n">subject_prefix</span><span class="p">,</span> <span class="n">db_path</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">semsim_to_exomiserdb</span><span class="p">(</span><span class="n">input_path</span><span class="p">,</span> <span class="n">object_prefix</span><span class="p">,</span> <span class="n">subject_prefix</span><span class="p">,</span> <span class="n">db_path</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1280,6 +1282,8 @@ <h2 id="src.pheval.utils.exomiser.semsim_to_exomiserdb" class="doc doc-heading">
   
       <p>ingests semsim file into exomiser phenotypic database</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1296,7 +1300,11 @@ <h2 id="src.pheval.utils.exomiser.semsim_to_exomiserdb" class="doc doc-heading">
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>semsim input file. e.g phenio-plus-hp-mp.0.semsimian.tsv</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>semsim input file. e.g phenio-plus-hp-mp.0.semsimian.tsv</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1306,7 +1314,11 @@ <h2 id="src.pheval.utils.exomiser.semsim_to_exomiserdb" class="doc doc-heading">
           <td>
                 <code>str</code>
           </td>
-          <td><p>object prefix. e.g. MP</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>object prefix. e.g. MP</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1316,7 +1328,11 @@ <h2 id="src.pheval.utils.exomiser.semsim_to_exomiserdb" class="doc doc-heading">
           <td>
                 <code>str</code>
           </td>
-          <td><p>subject prefix e.g HP</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>subject prefix e.g HP</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1326,7 +1342,11 @@ <h2 id="src.pheval.utils.exomiser.semsim_to_exomiserdb" class="doc doc-heading">
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Exomiser Phenotypic Database Folder Path. (e.g. /exomiser_folder/2209_phenotype/2209_phenotype/)</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Exomiser Phenotypic Database Folder Path. (e.g. /exomiser_folder/2209_phenotype/2209_phenotype/)</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1334,9 +1354,9 @@ <h2 id="src.pheval.utils.exomiser.semsim_to_exomiserdb" class="doc doc-heading">
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/exomiser.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 6</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/exomiser.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 6</span>
 <span class="normal"> 7</span>
 <span class="normal"> 8</span>
 <span class="normal"> 9</span>
@@ -1358,7 +1378,7 @@ <h2 id="src.pheval.utils.exomiser.semsim_to_exomiserdb" class="doc doc-heading">
     <span class="n">exomiserdb</span> <span class="o">=</span> <span class="n">ExomiserDB</span><span class="p">(</span><span class="n">db_path</span><span class="p">)</span>
     <span class="n">exomiserdb</span><span class="o">.</span><span class="n">import_from_semsim_file</span><span class="p">(</span><span class="n">input_path</span><span class="p">,</span> <span class="n">object_prefix</span><span class="p">,</span> <span class="n">subject_prefix</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
diff --git a/api/pheval/utils/file_utils/index.html b/api/pheval/utils/file_utils/index.html
index c7ef79e1d..783b9b8e5 100644
--- a/api/pheval/utils/file_utils/index.html
+++ b/api/pheval/utils/file_utils/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/utils/file_utils/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -992,56 +992,56 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.file_utils" class="md-nav__link">
-    src.pheval.utils.file_utils
+    file_utils
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.file_utils.all_files" class="md-nav__link">
-    all_files()
+    all_files
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.file_utils.ensure_columns_exists" class="md-nav__link">
-    ensure_columns_exists()
+    ensure_columns_exists
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.file_utils.ensure_file_exists" class="md-nav__link">
-    ensure_file_exists()
+    ensure_file_exists
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.file_utils.files_with_suffix" class="md-nav__link">
-    files_with_suffix()
+    files_with_suffix
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.file_utils.is_gzipped" class="md-nav__link">
-    is_gzipped()
+    is_gzipped
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.file_utils.normalise_file_name" class="md-nav__link">
-    normalise_file_name()
+    normalise_file_name
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.file_utils.write_metadata" class="md-nav__link">
-    write_metadata()
+    write_metadata
   </a>
   
 </li>
@@ -1256,56 +1256,56 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.file_utils" class="md-nav__link">
-    src.pheval.utils.file_utils
+    file_utils
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.file_utils.all_files" class="md-nav__link">
-    all_files()
+    all_files
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.file_utils.ensure_columns_exists" class="md-nav__link">
-    ensure_columns_exists()
+    ensure_columns_exists
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.file_utils.ensure_file_exists" class="md-nav__link">
-    ensure_file_exists()
+    ensure_file_exists
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.file_utils.files_with_suffix" class="md-nav__link">
-    files_with_suffix()
+    files_with_suffix
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.file_utils.is_gzipped" class="md-nav__link">
-    is_gzipped()
+    is_gzipped
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.file_utils.normalise_file_name" class="md-nav__link">
-    normalise_file_name()
+    normalise_file_name
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.file_utils.write_metadata" class="md-nav__link">
-    write_metadata()
+    write_metadata
   </a>
   
 </li>
@@ -1335,6 +1335,7 @@ <h1>File utils</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.utils.file_utils"></a>
   <div class="doc doc-contents first">
 
@@ -1350,12 +1351,13 @@ <h1>File utils</h1>
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.utils.file_utils.all_files" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">all_files</span><span class="p">(</span><span class="n">directory</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">all_files</span><span class="p">(</span><span class="n">directory</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1364,6 +1366,8 @@ <h2 id="src.pheval.utils.file_utils.all_files" class="doc doc-heading">
   
       <p>Obtains all files from a given directory.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1380,7 +1384,11 @@ <h2 id="src.pheval.utils.file_utils.all_files" class="doc doc-heading">
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory path.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory path.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1388,6 +1396,8 @@ <h2 id="src.pheval.utils.file_utils.all_files" class="doc doc-heading">
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -1401,14 +1411,18 @@ <h2 id="src.pheval.utils.file_utils.all_files" class="doc doc-heading">
           <td>
                 <code>list[<span title="pathlib.Path">Path</span>]</code>
           </td>
-          <td><p>list[Path]: A list of Path objects representing all files in the directory.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>list[Path]: A list of Path objects representing all files in the directory.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/file_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">31</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/file_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">31</span>
 <span class="normal">32</span>
 <span class="normal">33</span>
 <span class="normal">34</span>
@@ -1434,17 +1448,18 @@ <h2 id="src.pheval.utils.file_utils.all_files" class="doc doc-heading">
     <span class="n">files</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span>
     <span class="k">return</span> <span class="n">files</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.utils.file_utils.ensure_columns_exists" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">ensure_columns_exists</span><span class="p">(</span><span class="n">cols</span><span class="p">,</span> <span class="n">dataframes</span><span class="p">,</span> <span class="n">err_message</span><span class="o">=</span><span class="s1">&#39;&#39;</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">ensure_columns_exists</span><span class="p">(</span><span class="n">cols</span><span class="p">,</span> <span class="n">dataframes</span><span class="p">,</span> <span class="n">err_message</span><span class="o">=</span><span class="s1">&#39;&#39;</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1460,9 +1475,9 @@ <h2 id="src.pheval.utils.file_utils.ensure_columns_exists" class="doc doc-headin
 )
 "</p>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/file_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 83</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/file_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 83</span>
 <span class="normal"> 84</span>
 <span class="normal"> 85</span>
 <span class="normal"> 86</span>
@@ -1508,46 +1523,31 @@ <h2 id="src.pheval.utils.file_utils.ensure_columns_exists" class="doc doc-headin
         <span class="k">if</span> <span class="ow">not</span> <span class="nb">all</span><span class="p">(</span><span class="n">x</span> <span class="ow">in</span> <span class="n">dataframe</span><span class="o">.</span><span class="n">columns</span> <span class="k">for</span> <span class="n">x</span> <span class="ow">in</span> <span class="n">flat_cols</span><span class="p">):</span>
             <span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="n">err_msg</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.utils.file_utils.ensure_file_exists" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">ensure_file_exists</span><span class="p">(</span><span class="o">*</span><span class="n">files</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">ensure_file_exists</span><span class="p">(</span><span class="o">*</span><span class="n">files</span><span class="p">)</span></code>
 
 </h2>
 
 
   <div class="doc doc-contents ">
   
-      <p>Ensures the existence of files passed as parameter</p>
+      <p>Ensures the existence of files passed as parameter
+Raises:
+    FileNotFoundError: If any file passed as a parameter doesn't exist a FileNotFound Exception will be raised</p>
 
-  <p><strong>Raises:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Type</th>
-        <th>Description</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-          <td>
-                <code>FileNotFoundError</code>
-          </td>
-          <td><p>If any file passed as a parameter doesn't exist a FileNotFound Exception will be raised</p></td>
-        </tr>
-    </tbody>
-  </table>
-
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/file_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">73</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/file_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">73</span>
 <span class="normal">74</span>
 <span class="normal">75</span>
 <span class="normal">76</span>
@@ -1563,17 +1563,18 @@ <h2 id="src.pheval.utils.file_utils.ensure_file_exists" class="doc doc-heading">
         <span class="k">if</span> <span class="ow">not</span> <span class="n">path</span><span class="o">.</span><span class="n">isfile</span><span class="p">(</span><span class="n">file</span><span class="p">):</span>
             <span class="k">raise</span> <span class="ne">FileNotFoundError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;File </span><span class="si">{</span><span class="n">file</span><span class="si">}</span><span class="s2"> not found&quot;</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.utils.file_utils.files_with_suffix" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">files_with_suffix</span><span class="p">(</span><span class="n">directory</span><span class="p">,</span> <span class="n">suffix</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">files_with_suffix</span><span class="p">(</span><span class="n">directory</span><span class="p">,</span> <span class="n">suffix</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1582,6 +1583,8 @@ <h2 id="src.pheval.utils.file_utils.files_with_suffix" class="doc doc-heading">
   
       <p>Obtains all files ending in a specified suffix from a given directory.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1598,7 +1601,11 @@ <h2 id="src.pheval.utils.file_utils.files_with_suffix" class="doc doc-heading">
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory path.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory path.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1608,7 +1615,11 @@ <h2 id="src.pheval.utils.file_utils.files_with_suffix" class="doc doc-heading">
           <td>
                 <code>str</code>
           </td>
-          <td><p>The specified suffix to filter files.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The specified suffix to filter files.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1616,6 +1627,8 @@ <h2 id="src.pheval.utils.file_utils.files_with_suffix" class="doc doc-heading">
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -1629,14 +1642,18 @@ <h2 id="src.pheval.utils.file_utils.files_with_suffix" class="doc doc-heading">
           <td>
                 <code>list[<span title="pathlib.Path">Path</span>]</code>
           </td>
-          <td><p>list[Path]: A list of Path objects representing files with the specified suffix.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>list[Path]: A list of Path objects representing files with the specified suffix.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/file_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">15</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/file_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">15</span>
 <span class="normal">16</span>
 <span class="normal">17</span>
 <span class="normal">18</span>
@@ -1664,17 +1681,18 @@ <h2 id="src.pheval.utils.file_utils.files_with_suffix" class="doc doc-heading">
     <span class="n">files</span><span class="o">.</span><span class="n">sort</span><span class="p">()</span>
     <span class="k">return</span> <span class="n">files</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.utils.file_utils.is_gzipped" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">is_gzipped</span><span class="p">(</span><span class="n">file_path</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">is_gzipped</span><span class="p">(</span><span class="n">file_path</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1683,6 +1701,8 @@ <h2 id="src.pheval.utils.file_utils.is_gzipped" class="doc doc-heading">
   
       <p>Confirms whether a file is gzipped.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1699,7 +1719,11 @@ <h2 id="src.pheval.utils.file_utils.is_gzipped" class="doc doc-heading">
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The path to the file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The path to the file.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1707,6 +1731,8 @@ <h2 id="src.pheval.utils.file_utils.is_gzipped" class="doc doc-heading">
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -1720,14 +1746,18 @@ <h2 id="src.pheval.utils.file_utils.is_gzipped" class="doc doc-heading">
 <td><code>bool</code></td>          <td>
                 <code>bool</code>
           </td>
-          <td><p>True if the file is gzipped, False otherwise.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>True if the file is gzipped, False otherwise.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/file_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">46</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/file_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">46</span>
 <span class="normal">47</span>
 <span class="normal">48</span>
 <span class="normal">49</span>
@@ -1749,17 +1779,18 @@ <h2 id="src.pheval.utils.file_utils.is_gzipped" class="doc doc-heading">
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="k">return</span> <span class="n">file_path</span><span class="o">.</span><span class="n">name</span><span class="o">.</span><span class="n">endswith</span><span class="p">(</span><span class="s2">&quot;.gz&quot;</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.utils.file_utils.normalise_file_name" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">normalise_file_name</span><span class="p">(</span><span class="n">file_path</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">normalise_file_name</span><span class="p">(</span><span class="n">file_path</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1768,6 +1799,8 @@ <h2 id="src.pheval.utils.file_utils.normalise_file_name" class="doc doc-heading"
   
       <p>Normalises the file name by removing diacritical marks (accents) from Unicode characters.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1784,7 +1817,11 @@ <h2 id="src.pheval.utils.file_utils.normalise_file_name" class="doc doc-heading"
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The path to the file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The path to the file.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1792,6 +1829,8 @@ <h2 id="src.pheval.utils.file_utils.normalise_file_name" class="doc doc-heading"
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -1805,14 +1844,18 @@ <h2 id="src.pheval.utils.file_utils.normalise_file_name" class="doc doc-heading"
 <td><code>str</code></td>          <td>
                 <code>str</code>
           </td>
-          <td><p>The normalised file name without diacritical marks.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The normalised file name without diacritical marks.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/file_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">59</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/file_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">59</span>
 <span class="normal">60</span>
 <span class="normal">61</span>
 <span class="normal">62</span>
@@ -1836,17 +1879,18 @@ <h2 id="src.pheval.utils.file_utils.normalise_file_name" class="doc doc-heading"
     <span class="n">normalised_file_name</span> <span class="o">=</span> <span class="n">unicodedata</span><span class="o">.</span><span class="n">normalize</span><span class="p">(</span><span class="s2">&quot;NFD&quot;</span><span class="p">,</span> <span class="nb">str</span><span class="p">(</span><span class="n">file_path</span><span class="p">))</span>
     <span class="k">return</span> <span class="n">re</span><span class="o">.</span><span class="n">sub</span><span class="p">(</span><span class="s2">&quot;[</span><span class="se">\u0300</span><span class="s2">-</span><span class="se">\u036f</span><span class="s2">]&quot;</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">,</span> <span class="n">normalised_file_name</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.utils.file_utils.write_metadata" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">write_metadata</span><span class="p">(</span><span class="n">output_dir</span><span class="p">,</span> <span class="n">meta_data</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">write_metadata</span><span class="p">(</span><span class="n">output_dir</span><span class="p">,</span> <span class="n">meta_data</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1855,6 +1899,8 @@ <h2 id="src.pheval.utils.file_utils.write_metadata" class="doc doc-heading">
   
       <p>Write the metadata for a run to a YAML file.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1871,7 +1917,11 @@ <h2 id="src.pheval.utils.file_utils.write_metadata" class="doc doc-heading">
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory where the metadata file will be saved.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory where the metadata file will be saved.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1881,7 +1931,11 @@ <h2 id="src.pheval.utils.file_utils.write_metadata" class="doc doc-heading">
           <td>
                 <code><span title="pheval.run_metadata.BasicOutputRunMetaData">BasicOutputRunMetaData</span></code>
           </td>
-          <td><p>The metadata to be written.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The metadata to be written.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1889,9 +1943,9 @@ <h2 id="src.pheval.utils.file_utils.write_metadata" class="doc doc-heading">
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/file_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">108</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/file_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">108</span>
 <span class="normal">109</span>
 <span class="normal">110</span>
 <span class="normal">111</span>
@@ -1913,7 +1967,7 @@ <h2 id="src.pheval.utils.file_utils.write_metadata" class="doc doc-heading">
         <span class="n">yaml</span><span class="o">.</span><span class="n">dump</span><span class="p">(</span><span class="n">to_dict</span><span class="p">(</span><span class="n">meta_data</span><span class="p">),</span> <span class="n">metadata_file</span><span class="p">,</span> <span class="n">sort_keys</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">default_style</span><span class="o">=</span><span class="s2">&quot;&quot;</span><span class="p">)</span>
     <span class="n">metadata_file</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
diff --git a/api/pheval/utils/phenopacket_utils/index.html b/api/pheval/utils/phenopacket_utils/index.html
index 1f7e13f75..f97621bfc 100644
--- a/api/pheval/utils/phenopacket_utils/index.html
+++ b/api/pheval/utils/phenopacket_utils/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/utils/phenopacket_utils/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -1006,7 +1006,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils" class="md-nav__link">
-    src.pheval.utils.phenopacket_utils
+    phenopacket_utils
   </a>
   
 </li>
@@ -1021,28 +1021,28 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.find_identifier" class="md-nav__link">
-    find_identifier()
+    find_identifier
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.obtain_gene_symbol_from_identifier" class="md-nav__link">
-    obtain_gene_symbol_from_identifier()
+    obtain_gene_symbol_from_identifier
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.update_genomic_interpretations_gene_identifier" class="md-nav__link">
-    update_genomic_interpretations_gene_identifier()
+    update_genomic_interpretations_gene_identifier
   </a>
   
 </li>
@@ -1069,7 +1069,7 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.IncompatibleGenomeAssemblyError.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
@@ -1089,28 +1089,28 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.add_randomised_hpo" class="md-nav__link">
-    add_randomised_hpo()
+    add_randomised_hpo
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.add_spiked_vcf_path" class="md-nav__link">
-    add_spiked_vcf_path()
+    add_spiked_vcf_path
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.update_interpretations" class="md-nav__link">
-    update_interpretations()
+    update_interpretations
   </a>
   
 </li>
@@ -1130,112 +1130,112 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.causative_variants" class="md-nav__link">
-    causative_variants()
+    causative_variants
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.check_incomplete_disease_record" class="md-nav__link">
-    check_incomplete_disease_record()
+    check_incomplete_disease_record
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.check_incomplete_gene_record" class="md-nav__link">
-    check_incomplete_gene_record()
+    check_incomplete_gene_record
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.check_incomplete_variant_record" class="md-nav__link">
-    check_incomplete_variant_record()
+    check_incomplete_variant_record
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.diagnosed_genes" class="md-nav__link">
-    diagnosed_genes()
+    diagnosed_genes
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.diagnosed_variants" class="md-nav__link">
-    diagnosed_variants()
+    diagnosed_variants
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.diagnoses" class="md-nav__link">
-    diagnoses()
+    diagnoses
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.diseases" class="md-nav__link">
-    diseases()
+    diseases
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.files" class="md-nav__link">
-    files()
+    files
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.interpretations" class="md-nav__link">
-    interpretations()
+    interpretations
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.negated_phenotypic_features" class="md-nav__link">
-    negated_phenotypic_features()
+    negated_phenotypic_features
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.observed_phenotypic_features" class="md-nav__link">
-    observed_phenotypic_features()
+    observed_phenotypic_features
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.phenotypic_features" class="md-nav__link">
-    phenotypic_features()
+    phenotypic_features
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.sample_id" class="md-nav__link">
-    sample_id()
+    sample_id
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.vcf_file_data" class="md-nav__link">
-    vcf_file_data()
+    vcf_file_data
   </a>
   
 </li>
@@ -1268,42 +1268,42 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.create_gene_identifier_map" class="md-nav__link">
-    create_gene_identifier_map()
+    create_gene_identifier_map
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.create_hgnc_dict" class="md-nav__link">
-    create_hgnc_dict()
+    create_hgnc_dict
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.create_json_message" class="md-nav__link">
-    create_json_message()
+    create_json_message
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.phenopacket_reader" class="md-nav__link">
-    phenopacket_reader()
+    phenopacket_reader
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.read_hgnc_data" class="md-nav__link">
-    read_hgnc_data()
+    read_hgnc_data
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.write_phenopacket" class="md-nav__link">
-    write_phenopacket()
+    write_phenopacket
   </a>
   
 </li>
@@ -1504,7 +1504,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils" class="md-nav__link">
-    src.pheval.utils.phenopacket_utils
+    phenopacket_utils
   </a>
   
 </li>
@@ -1519,28 +1519,28 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.find_identifier" class="md-nav__link">
-    find_identifier()
+    find_identifier
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.obtain_gene_symbol_from_identifier" class="md-nav__link">
-    obtain_gene_symbol_from_identifier()
+    obtain_gene_symbol_from_identifier
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.update_genomic_interpretations_gene_identifier" class="md-nav__link">
-    update_genomic_interpretations_gene_identifier()
+    update_genomic_interpretations_gene_identifier
   </a>
   
 </li>
@@ -1567,7 +1567,7 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.IncompatibleGenomeAssemblyError.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
@@ -1587,28 +1587,28 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.add_randomised_hpo" class="md-nav__link">
-    add_randomised_hpo()
+    add_randomised_hpo
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.add_spiked_vcf_path" class="md-nav__link">
-    add_spiked_vcf_path()
+    add_spiked_vcf_path
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.update_interpretations" class="md-nav__link">
-    update_interpretations()
+    update_interpretations
   </a>
   
 </li>
@@ -1628,112 +1628,112 @@
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.__init__" class="md-nav__link">
-    __init__()
+    __init__
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.causative_variants" class="md-nav__link">
-    causative_variants()
+    causative_variants
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.check_incomplete_disease_record" class="md-nav__link">
-    check_incomplete_disease_record()
+    check_incomplete_disease_record
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.check_incomplete_gene_record" class="md-nav__link">
-    check_incomplete_gene_record()
+    check_incomplete_gene_record
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.check_incomplete_variant_record" class="md-nav__link">
-    check_incomplete_variant_record()
+    check_incomplete_variant_record
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.diagnosed_genes" class="md-nav__link">
-    diagnosed_genes()
+    diagnosed_genes
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.diagnosed_variants" class="md-nav__link">
-    diagnosed_variants()
+    diagnosed_variants
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.diagnoses" class="md-nav__link">
-    diagnoses()
+    diagnoses
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.diseases" class="md-nav__link">
-    diseases()
+    diseases
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.files" class="md-nav__link">
-    files()
+    files
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.interpretations" class="md-nav__link">
-    interpretations()
+    interpretations
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.negated_phenotypic_features" class="md-nav__link">
-    negated_phenotypic_features()
+    negated_phenotypic_features
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.observed_phenotypic_features" class="md-nav__link">
-    observed_phenotypic_features()
+    observed_phenotypic_features
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.phenotypic_features" class="md-nav__link">
-    phenotypic_features()
+    phenotypic_features
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.sample_id" class="md-nav__link">
-    sample_id()
+    sample_id
   </a>
   
 </li>
         
           <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil.vcf_file_data" class="md-nav__link">
-    vcf_file_data()
+    vcf_file_data
   </a>
   
 </li>
@@ -1766,42 +1766,42 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.create_gene_identifier_map" class="md-nav__link">
-    create_gene_identifier_map()
+    create_gene_identifier_map
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.create_hgnc_dict" class="md-nav__link">
-    create_hgnc_dict()
+    create_hgnc_dict
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.create_json_message" class="md-nav__link">
-    create_json_message()
+    create_json_message
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.phenopacket_reader" class="md-nav__link">
-    phenopacket_reader()
+    phenopacket_reader
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.read_hgnc_data" class="md-nav__link">
-    read_hgnc_data()
+    read_hgnc_data
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.write_phenopacket" class="md-nav__link">
-    write_phenopacket()
+    write_phenopacket
   </a>
   
 </li>
@@ -1831,6 +1831,7 @@ <h1>Phenopacket utils</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.utils.phenopacket_utils"></a>
   <div class="doc doc-contents first">
 
@@ -1850,7 +1851,7 @@ <h1>Phenopacket utils</h1>
 
 
 <h2 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater" class="doc doc-heading">
-        <code>GeneIdentifierUpdater</code>
+          <code>GeneIdentifierUpdater</code>
 
 
 </h2>
@@ -1861,10 +1862,9 @@ <h2 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater" class="doc doc
   
       <p>Class for updating gene identifiers within genomic interpretations.</p>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">638</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">638</span>
 <span class="normal">639</span>
 <span class="normal">640</span>
 <span class="normal">641</span>
@@ -2080,7 +2080,7 @@ <h2 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater" class="doc doc
                 <span class="p">)</span>
         <span class="k">return</span> <span class="n">updated_interpretations</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -2094,12 +2094,13 @@ <h2 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater" class="doc doc
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.__init__" class="doc doc-heading">
-<code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">gene_identifier</span><span class="p">,</span> <span class="n">hgnc_data</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">identifier_map</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">gene_identifier</span><span class="p">,</span> <span class="n">hgnc_data</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">identifier_map</span><span class="o">=</span><span class="kc">None</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2108,6 +2109,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.__init__" class
   
       <p>Initialise the GeneIdentifierUpdater.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2124,7 +2127,11 @@ <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.__init__" class
           <td>
                 <code>str</code>
           </td>
-          <td><p>The gene identifier to update to.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The gene identifier to update to.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2134,7 +2141,11 @@ <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.__init__" class
           <td>
                 <code>dict</code>
           </td>
-          <td><p>A dictionary containing HGNC data (default: None).</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>A dictionary containing HGNC data (default: None).</p>
+            </div>
+          </td>
           <td>
                 <code>None</code>
           </td>
@@ -2144,7 +2155,11 @@ <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.__init__" class
           <td>
                 <code>dict</code>
           </td>
-          <td><p>A dictionary mapping gene identifiers (default: None).</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>A dictionary mapping gene identifiers (default: None).</p>
+            </div>
+          </td>
           <td>
                 <code>None</code>
           </td>
@@ -2152,9 +2167,9 @@ <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.__init__" class
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">641</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">641</span>
 <span class="normal">642</span>
 <span class="normal">643</span>
 <span class="normal">644</span>
@@ -2180,17 +2195,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.__init__" class
     <span class="bp">self</span><span class="o">.</span><span class="n">gene_identifier</span> <span class="o">=</span> <span class="n">gene_identifier</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">identifier_map</span> <span class="o">=</span> <span class="n">identifier_map</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.find_identifier" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">find_identifier</span><span class="p">(</span><span class="n">gene_symbol</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">find_identifier</span><span class="p">(</span><span class="n">gene_symbol</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2199,6 +2215,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.find_identifier
   
       <p>Find the specified gene identifier for a gene symbol.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2215,7 +2233,11 @@ <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.find_identifier
           <td>
                 <code>str</code>
           </td>
-          <td><p>The gene symbol to find the identifier for.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The gene symbol to find the identifier for.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2223,6 +2245,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.find_identifier
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2236,14 +2260,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.find_identifier
 <td><code>str</code></td>          <td>
                 <code>str</code>
           </td>
-          <td><p>The identified gene identifier.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The identified gene identifier.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">655</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">655</span>
 <span class="normal">656</span>
 <span class="normal">657</span>
 <span class="normal">658</span>
@@ -2277,17 +2305,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.find_identifier
                 <span class="k">if</span> <span class="n">prev_symbol</span> <span class="o">==</span> <span class="n">gene_symbol</span><span class="p">:</span>
                     <span class="k">return</span> <span class="n">data</span><span class="p">[</span><span class="bp">self</span><span class="o">.</span><span class="n">gene_identifier</span><span class="p">]</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.obtain_gene_symbol_from_identifier" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">obtain_gene_symbol_from_identifier</span><span class="p">(</span><span class="n">query_gene_identifier</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">obtain_gene_symbol_from_identifier</span><span class="p">(</span><span class="n">query_gene_identifier</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2296,6 +2325,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.obtain_gene_sym
   
       <p>Obtain gene symbol from a gene identifier.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2312,7 +2343,11 @@ <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.obtain_gene_sym
           <td>
                 <code>str</code>
           </td>
-          <td><p>The gene identifier.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The gene identifier.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2320,6 +2355,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.obtain_gene_sym
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2333,14 +2370,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.obtain_gene_sym
 <td><code>str</code></td>          <td>
                 <code>str</code>
           </td>
-          <td><p>The gene symbol corresponding to the identifier.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The gene symbol corresponding to the identifier.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">673</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">673</span>
 <span class="normal">674</span>
 <span class="normal">675</span>
 <span class="normal">676</span>
@@ -2362,17 +2403,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.obtain_gene_sym
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">identifier_map</span><span class="p">[</span><span class="n">query_gene_identifier</span><span class="p">]</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.update_genomic_interpretations_gene_identifier" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">update_genomic_interpretations_gene_identifier</span><span class="p">(</span><span class="n">interpretations</span><span class="p">,</span> <span class="n">phenopacket_path</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">update_genomic_interpretations_gene_identifier</span><span class="p">(</span><span class="n">interpretations</span><span class="p">,</span> <span class="n">phenopacket_path</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2381,6 +2423,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.update_genomic_
   
       <p>Update the genomic interpretations of a Phenopacket.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2397,7 +2441,11 @@ <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.update_genomic_
           <td>
                 <code><span title="typing.List">List</span>[<span title="phenopackets.Interpretation">Interpretation</span>]</code>
           </td>
-          <td><p>List of Interpretation objects.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List of Interpretation objects.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2405,6 +2453,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.update_genomic_
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -2418,14 +2468,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.update_genomic_
           <td>
                 <code><span title="typing.List">List</span>[<span title="phenopackets.Interpretation">Interpretation</span>]</code>
           </td>
-          <td><p>List[Interpretation]: Updated list of Interpretation objects.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List[Interpretation]: Updated list of Interpretation objects.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">713</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">713</span>
 <span class="normal">714</span>
 <span class="normal">715</span>
 <span class="normal">716</span>
@@ -2491,7 +2545,7 @@ <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.update_genomic_
             <span class="p">)</span>
     <span class="k">return</span> <span class="n">updated_interpretations</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
@@ -2502,6 +2556,7 @@ <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.update_genomic_
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -2509,7 +2564,7 @@ <h3 id="src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.update_genomic_
 
 
 <h2 id="src.pheval.utils.phenopacket_utils.GenomicVariant" class="doc doc-heading">
-        <code>GenomicVariant</code>
+          <code>GenomicVariant</code>
 
   
   <span class="doc doc-labels">
@@ -2524,6 +2579,8 @@ <h2 id="src.pheval.utils.phenopacket_utils.GenomicVariant" class="doc doc-headin
   
       <p>Represents a genomic variant.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2540,7 +2597,11 @@ <h2 id="src.pheval.utils.phenopacket_utils.GenomicVariant" class="doc doc-headin
           <td>
                 <code>str</code>
           </td>
-          <td><p>The chromosome position of the variant recommended to be provided in the following format.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The chromosome position of the variant recommended to be provided in the following format.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2550,7 +2611,11 @@ <h2 id="src.pheval.utils.phenopacket_utils.GenomicVariant" class="doc doc-headin
           <td>
                 <code>int</code>
           </td>
-          <td><p>Position of the variant following VCF convention.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Position of the variant following VCF convention.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2560,7 +2625,11 @@ <h2 id="src.pheval.utils.phenopacket_utils.GenomicVariant" class="doc doc-headin
           <td>
                 <code>str</code>
           </td>
-          <td><p>Reference allele following VCF convention.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Reference allele following VCF convention.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2570,7 +2639,11 @@ <h2 id="src.pheval.utils.phenopacket_utils.GenomicVariant" class="doc doc-headin
           <td>
                 <code>str</code>
           </td>
-          <td><p>Alternate allele following VCF convention.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Alternate allele following VCF convention.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2578,10 +2651,9 @@ <h2 id="src.pheval.utils.phenopacket_utils.GenomicVariant" class="doc doc-headin
     </tbody>
   </table>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">48</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">48</span>
 <span class="normal">49</span>
 <span class="normal">50</span>
 <span class="normal">51</span>
@@ -2617,7 +2689,7 @@ <h2 id="src.pheval.utils.phenopacket_utils.GenomicVariant" class="doc doc-headin
     <span class="n">ref</span><span class="p">:</span> <span class="nb">str</span>
     <span class="n">alt</span><span class="p">:</span> <span class="nb">str</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -2637,6 +2709,7 @@ <h2 id="src.pheval.utils.phenopacket_utils.GenomicVariant" class="doc doc-headin
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -2644,23 +2717,22 @@ <h2 id="src.pheval.utils.phenopacket_utils.GenomicVariant" class="doc doc-headin
 
 
 <h2 id="src.pheval.utils.phenopacket_utils.IncompatibleGenomeAssemblyError" class="doc doc-heading">
-        <code>IncompatibleGenomeAssemblyError</code>
+          <code>IncompatibleGenomeAssemblyError</code>
 
 
 </h2>
 
 
   <div class="doc doc-contents ">
-      <p class="doc doc-class-bases">
-        Bases: <code>Exception</code></p>
+          <p class="doc doc-class-bases">
+            Bases: <code>Exception</code></p>
 
   
       <p>Exception raised for incompatible genome assembly.</p>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">27</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">27</span>
 <span class="normal">28</span>
 <span class="normal">29</span>
 <span class="normal">30</span>
@@ -2698,7 +2770,7 @@ <h2 id="src.pheval.utils.phenopacket_utils.IncompatibleGenomeAssemblyError" clas
     <span class="k">def</span> <span class="fm">__str__</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
         <span class="k">return</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">message</span><span class="si">}</span><span class="s2"> -&gt; </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">assembly</span><span class="si">}</span><span class="s2"> in </span><span class="si">{</span><span class="bp">self</span><span class="o">.</span><span class="n">phenopacket</span><span class="si">}</span><span class="s2">&quot;</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -2712,12 +2784,13 @@ <h2 id="src.pheval.utils.phenopacket_utils.IncompatibleGenomeAssemblyError" clas
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.utils.phenopacket_utils.IncompatibleGenomeAssemblyError.__init__" class="doc doc-heading">
-<code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">assembly</span><span class="p">,</span> <span class="n">phenopacket</span><span class="p">,</span> <span class="n">message</span><span class="o">=</span><span class="s1">&#39;Incompatible Genome Assembly&#39;</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">assembly</span><span class="p">,</span> <span class="n">phenopacket</span><span class="p">,</span> <span class="n">message</span><span class="o">=</span><span class="s1">&#39;Incompatible Genome Assembly&#39;</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2726,6 +2799,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.IncompatibleGenomeAssemblyError.__ini
   
       <p>Initialise IncompatibleGenomeAssemblyError.</p>
 
+
+
   <p><strong>Attributes:</strong></p>
   <table>
     <thead>
@@ -2737,32 +2812,44 @@ <h3 id="src.pheval.utils.phenopacket_utils.IncompatibleGenomeAssemblyError.__ini
     </thead>
     <tbody>
         <tr>
-          <td><code>assembly</code></td>
+          <td><code><span title="src.pheval.utils.phenopacket_utils.IncompatibleGenomeAssemblyError.__init__.assembly">assembly</span></code></td>
           <td>
                 <code>str</code>
           </td>
-          <td><p>Incompatible genome assembly encountered.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Incompatible genome assembly encountered.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>phenopacket</code></td>
+          <td><code><span title="src.pheval.utils.phenopacket_utils.IncompatibleGenomeAssemblyError.__init__.phenopacket">phenopacket</span></code></td>
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the Phenopacket associated with the error.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the Phenopacket associated with the error.</p>
+            </div>
+          </td>
         </tr>
         <tr>
-          <td><code>message</code></td>
+          <td><code><span title="src.pheval.utils.phenopacket_utils.IncompatibleGenomeAssemblyError.__init__.message">message</span></code></td>
           <td>
                 <code>str</code>
           </td>
-          <td><p>Custom error message (default is "Incompatible Genome Assembly").</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Custom error message (default is "Incompatible Genome Assembly").</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">30</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">30</span>
 <span class="normal">31</span>
 <span class="normal">32</span>
 <span class="normal">33</span>
@@ -2788,7 +2875,7 @@ <h3 id="src.pheval.utils.phenopacket_utils.IncompatibleGenomeAssemblyError.__ini
     <span class="bp">self</span><span class="o">.</span><span class="n">message</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="n">message</span>
     <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">message</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
@@ -2799,6 +2886,7 @@ <h3 id="src.pheval.utils.phenopacket_utils.IncompatibleGenomeAssemblyError.__ini
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -2806,7 +2894,7 @@ <h3 id="src.pheval.utils.phenopacket_utils.IncompatibleGenomeAssemblyError.__ini
 
 
 <h2 id="src.pheval.utils.phenopacket_utils.PhenopacketRebuilder" class="doc doc-heading">
-        <code>PhenopacketRebuilder</code>
+          <code>PhenopacketRebuilder</code>
 
 
 </h2>
@@ -2817,10 +2905,9 @@ <h2 id="src.pheval.utils.phenopacket_utils.PhenopacketRebuilder" class="doc doc-
   
       <p>Class for rebuilding a Phenopacket</p>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">537</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">537</span>
 <span class="normal">538</span>
 <span class="normal">539</span>
 <span class="normal">540</span>
@@ -2958,7 +3045,7 @@ <h2 id="src.pheval.utils.phenopacket_utils.PhenopacketRebuilder" class="doc doc-
         <span class="n">phenopacket</span><span class="o">.</span><span class="n">files</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">phenopacket_files</span><span class="p">)</span>
         <span class="k">return</span> <span class="n">phenopacket</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -2972,12 +3059,13 @@ <h2 id="src.pheval.utils.phenopacket_utils.PhenopacketRebuilder" class="doc doc-
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.__init__" class="doc doc-heading">
-<code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">phenopacket</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">phenopacket</span><span class="p">)</span></code>
 
 </h3>
 
@@ -2986,6 +3074,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.__init__" class=
   
       <p>Initialise PhenopacketUtil</p>
 
+
+
   <p><strong>Attributes:</strong></p>
   <table>
     <thead>
@@ -2997,18 +3087,22 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.__init__" class=
     </thead>
     <tbody>
         <tr>
-          <td><code>phenopacket</code></td>
+          <td><code><span title="src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.__init__.phenopacket">phenopacket</span></code></td>
           <td>
                 <code><span title="typing.Union">Union</span>[<span title="phenopackets.Phenopacket">Phenopacket</span>, <span title="phenopackets.Family">Family</span>]</code>
           </td>
-          <td><p>Phenopacket or Family object</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Phenopacket or Family object</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">540</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">540</span>
 <span class="normal">541</span>
 <span class="normal">542</span>
 <span class="normal">543</span>
@@ -3022,17 +3116,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.__init__" class=
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">phenopacket</span> <span class="o">=</span> <span class="n">phenopacket</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.add_randomised_hpo" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">add_randomised_hpo</span><span class="p">(</span><span class="n">randomised_hpo</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">add_randomised_hpo</span><span class="p">(</span><span class="n">randomised_hpo</span><span class="p">)</span></code>
 
 </h3>
 
@@ -3041,6 +3136,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.add_randomised_h
   
       <p>Add randomised phenotypic profiles to a Phenopacket or Family.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -3057,7 +3154,11 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.add_randomised_h
           <td>
                 <code>[<span title="phenopackets.PhenotypicFeature">PhenotypicFeature</span>]</code>
           </td>
-          <td><p>The randomised phenotypic profiles to be added.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The randomised phenotypic profiles to be added.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3065,6 +3166,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.add_randomised_h
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -3078,14 +3181,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.add_randomised_h
           <td>
                 <code><span title="typing.Union">Union</span>[<span title="phenopackets.Phenopacket">Phenopacket</span>, <span title="phenopackets.Family">Family</span>]</code>
           </td>
-          <td><p>Union[Phenopacket, Family] The Phenopacket or Family object with added randomised profiles.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Union[Phenopacket, Family] The Phenopacket or Family object with added randomised profiles.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">569</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">569</span>
 <span class="normal">570</span>
 <span class="normal">571</span>
 <span class="normal">572</span>
@@ -3121,17 +3228,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.add_randomised_h
         <span class="n">phenopacket</span><span class="o">.</span><span class="n">phenotypic_features</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">randomised_hpo</span><span class="p">)</span>
     <span class="k">return</span> <span class="n">phenopacket</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.add_spiked_vcf_path" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">add_spiked_vcf_path</span><span class="p">(</span><span class="n">spiked_vcf_file_data</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">add_spiked_vcf_path</span><span class="p">(</span><span class="n">spiked_vcf_file_data</span><span class="p">)</span></code>
 
 </h3>
 
@@ -3139,16 +3247,14 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.add_spiked_vcf_p
   <div class="doc doc-contents ">
   
       <p>Add a spiked VCF path to a Phenopacket or Family.</p>
-      <ul>
-<li>spiked_vcf_file_data (File): The VCF file data to be added.</li>
-</ul>
-      <ul>
-<li>Phenopacket or Family: The Phenopacket or Family object with the added spiked VCF path.</li>
-</ul>
-
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">588</span>
+<p>Args:
+- spiked_vcf_file_data (File): The VCF file data to be added.</p>
+<p>Returns:
+- Phenopacket or Family: The Phenopacket or Family object with the added spiked VCF path.</p>
+
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">588</span>
 <span class="normal">589</span>
 <span class="normal">590</span>
 <span class="normal">591</span>
@@ -3184,17 +3290,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.add_spiked_vcf_p
     <span class="n">phenopacket</span><span class="o">.</span><span class="n">files</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">phenopacket_files</span><span class="p">)</span>
     <span class="k">return</span> <span class="n">phenopacket</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.update_interpretations" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">update_interpretations</span><span class="p">(</span><span class="n">interpretations</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">update_interpretations</span><span class="p">(</span><span class="n">interpretations</span><span class="p">)</span></code>
 
 </h3>
 
@@ -3203,6 +3310,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.update_interpret
   
       <p>Add the updated interpretations to a Phenopacket or Family.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -3219,7 +3328,11 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.update_interpret
           <td>
                 <code><span title="typing.List">List</span>[<span title="phenopackets.Interpretation">Interpretation</span>]</code>
           </td>
-          <td><p>The updated interpretations to be added.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The updated interpretations to be added.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -3227,6 +3340,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.update_interpret
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -3240,14 +3355,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.update_interpret
           <td>
                 <code><span title="typing.Union">Union</span>[<span title="phenopackets.Phenopacket">Phenopacket</span>, <span title="phenopackets.Family">Family</span>]</code>
           </td>
-          <td><p>Union[Phenopacket, Family]: The Phenopacket or Family object with updated interpretations.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Union[Phenopacket, Family]: The Phenopacket or Family object with updated interpretations.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">548</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">548</span>
 <span class="normal">549</span>
 <span class="normal">550</span>
 <span class="normal">551</span>
@@ -3287,7 +3406,7 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.update_interpret
         <span class="n">phenopacket</span><span class="o">.</span><span class="n">interpretations</span><span class="o">.</span><span class="n">extend</span><span class="p">(</span><span class="n">interpretations</span><span class="p">)</span>
     <span class="k">return</span> <span class="n">phenopacket</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
@@ -3298,6 +3417,7 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.update_interpret
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -3305,7 +3425,7 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.update_interpret
 
 
 <h2 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil" class="doc doc-heading">
-        <code>PhenopacketUtil</code>
+          <code>PhenopacketUtil</code>
 
 
 </h2>
@@ -3316,10 +3436,9 @@ <h2 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil" class="doc doc-headi
   
       <p>Class for retrieving data from a Phenopacket or Family object</p>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">219</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">219</span>
 <span class="normal">220</span>
 <span class="normal">221</span>
 <span class="normal">222</span>
@@ -3951,7 +4070,7 @@ <h2 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil" class="doc doc-headi
             <span class="k">return</span> <span class="kc">True</span>
         <span class="k">return</span> <span class="kc">False</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -3965,12 +4084,13 @@ <h2 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil" class="doc doc-headi
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.__init__" class="doc doc-heading">
-<code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">phenopacket_contents</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="fm">__init__</span><span class="p">(</span><span class="n">phenopacket_contents</span><span class="p">)</span></code>
 
 </h3>
 
@@ -3979,6 +4099,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.__init__" class="doc
   
       <p>Initialise PhenopacketUtil</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -3995,7 +4117,11 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.__init__" class="doc
           <td>
                 <code><span title="typing.Union">Union</span>[<span title="phenopackets.Phenopacket">Phenopacket</span>, <span title="phenopackets.Family">Family</span>]</code>
           </td>
-          <td><p>Phenopacket or Family object</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Phenopacket or Family object</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -4003,9 +4129,9 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.__init__" class="doc
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">222</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">222</span>
 <span class="normal">223</span>
 <span class="normal">224</span>
 <span class="normal">225</span>
@@ -4019,17 +4145,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.__init__" class="doc
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">phenopacket_contents</span> <span class="o">=</span> <span class="n">phenopacket_contents</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.causative_variants" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">causative_variants</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">causative_variants</span><span class="p">()</span></code>
 
 </h3>
 
@@ -4038,6 +4165,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.causative_variants" c
   
       <p>Retrieve a list of causative variants listed in a Phenopacket</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -4051,14 +4180,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.causative_variants" c
           <td>
                 <code><span title="typing.List">List</span>[<a class="autorefs autorefs-internal" title="src.pheval.utils.phenopacket_utils.ProbandCausativeVariant" href="#src.pheval.utils.phenopacket_utils.ProbandCausativeVariant">ProbandCausativeVariant</a>]</code>
           </td>
-          <td><p>List[ProbandCausativeVariant]: List of proband causative variants</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List[ProbandCausativeVariant]: List of proband causative variants</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">352</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">352</span>
 <span class="normal">353</span>
 <span class="normal">354</span>
 <span class="normal">355</span>
@@ -4112,17 +4245,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.causative_variants" c
             <span class="n">all_variants</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">variant_data</span><span class="p">)</span>
     <span class="k">return</span> <span class="n">all_variants</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.check_incomplete_disease_record" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">check_incomplete_disease_record</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">check_incomplete_disease_record</span><span class="p">()</span></code>
 
 </h3>
 
@@ -4133,6 +4267,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.check_incomplete_dise
 <p>This method iterates through the diagnosed disease records and checks if any of them
 have missing or incomplete information such as empty disease name, or disease identifier.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -4146,14 +4282,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.check_incomplete_dise
 <td><code>bool</code></td>          <td>
                 <code>bool</code>
           </td>
-          <td><p>True if any disease record is incomplete, False otherwise.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>True if any disease record is incomplete, False otherwise.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">522</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">522</span>
 <span class="normal">523</span>
 <span class="normal">524</span>
 <span class="normal">525</span>
@@ -4179,17 +4319,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.check_incomplete_dise
         <span class="k">return</span> <span class="kc">True</span>
     <span class="k">return</span> <span class="kc">False</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.check_incomplete_gene_record" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">check_incomplete_gene_record</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">check_incomplete_gene_record</span><span class="p">()</span></code>
 
 </h3>
 
@@ -4200,6 +4341,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.check_incomplete_gene
 <p>This method iterates through the diagnosed gene records and checks if any of them
 have missing or incomplete information such as gene name, or gene identifier.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -4213,14 +4356,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.check_incomplete_gene
 <td><code>bool</code></td>          <td>
                 <code>bool</code>
           </td>
-          <td><p>True if any gene record is incomplete, False otherwise.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>True if any gene record is incomplete, False otherwise.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">506</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">506</span>
 <span class="normal">507</span>
 <span class="normal">508</span>
 <span class="normal">509</span>
@@ -4250,17 +4397,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.check_incomplete_gene
             <span class="k">return</span> <span class="kc">True</span>
     <span class="k">return</span> <span class="kc">False</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.check_incomplete_variant_record" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">check_incomplete_variant_record</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">check_incomplete_variant_record</span><span class="p">()</span></code>
 
 </h3>
 
@@ -4272,6 +4420,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.check_incomplete_vari
 have missing or incomplete information such as empty chromosome, position, reference,
 or alternate allele.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -4285,14 +4435,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.check_incomplete_vari
 <td><code>bool</code></td>          <td>
                 <code>bool</code>
           </td>
-          <td><p>True if any variant record is incomplete, False otherwise.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>True if any variant record is incomplete, False otherwise.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">483</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">483</span>
 <span class="normal">484</span>
 <span class="normal">485</span>
 <span class="normal">486</span>
@@ -4336,46 +4490,31 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.check_incomplete_vari
             <span class="k">return</span> <span class="kc">True</span>
     <span class="k">return</span> <span class="kc">False</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.diagnosed_genes" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">diagnosed_genes</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">diagnosed_genes</span><span class="p">()</span></code>
 
 </h3>
 
 
   <div class="doc doc-contents ">
   
-      <p>Retrieve the disease causing genes from a phenopacket.</p>
-
-  <p><strong>Returns:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Type</th>
-        <th>Description</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-          <td>
-                <code><span title="typing.List">List</span>[<a class="autorefs autorefs-internal" title="src.pheval.utils.phenopacket_utils.ProbandCausativeGene" href="#src.pheval.utils.phenopacket_utils.ProbandCausativeGene">ProbandCausativeGene</a>]</code>
-          </td>
-          <td><p>List[ProbandCausativeGene]: List of causative genes</p></td>
-        </tr>
-    </tbody>
-  </table>
+      <p>Retrieve the disease causing genes from a phenopacket.
+Returns:
+    List[ProbandCausativeGene]: List of causative genes</p>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">446</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">446</span>
 <span class="normal">447</span>
 <span class="normal">448</span>
 <span class="normal">449</span>
@@ -4401,46 +4540,31 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.diagnosed_genes" clas
             <span class="n">genes</span> <span class="o">=</span> <span class="nb">list</span><span class="p">({</span><span class="n">gene</span><span class="o">.</span><span class="n">gene_symbol</span><span class="p">:</span> <span class="n">gene</span> <span class="k">for</span> <span class="n">gene</span> <span class="ow">in</span> <span class="n">genes</span><span class="p">}</span><span class="o">.</span><span class="n">values</span><span class="p">())</span>
     <span class="k">return</span> <span class="n">genes</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.diagnosed_variants" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">diagnosed_variants</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">diagnosed_variants</span><span class="p">()</span></code>
 
 </h3>
 
 
   <div class="doc doc-contents ">
   
-      <p>Retrieve a list of all known causative variants from a phenopacket.</p>
-
-  <p><strong>Returns:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Type</th>
-        <th>Description</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-          <td>
-                <code><span title="typing.List">List</span>[<a class="autorefs autorefs-internal" title="src.pheval.utils.phenopacket_utils.GenomicVariant" href="#src.pheval.utils.phenopacket_utils.GenomicVariant">GenomicVariant</a>]</code>
-          </td>
-          <td><p>List[GenomicVariant]: List of causative variants</p></td>
-        </tr>
-    </tbody>
-  </table>
+      <p>Retrieve a list of all known causative variants from a phenopacket.
+Returns:
+    List[GenomicVariant]: List of causative variants</p>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">460</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">460</span>
 <span class="normal">461</span>
 <span class="normal">462</span>
 <span class="normal">463</span>
@@ -4484,17 +4608,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.diagnosed_variants" c
             <span class="n">variants</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">variant</span><span class="p">)</span>
     <span class="k">return</span> <span class="n">variants</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.diagnoses" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">diagnoses</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">diagnoses</span><span class="p">()</span></code>
 
 </h3>
 
@@ -4503,6 +4628,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.diagnoses" class="doc
   
       <p>Retrieve a unique list of disease diagnoses associated with the proband from a Phenopacket</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -4516,14 +4643,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.diagnoses" class="doc
           <td>
                 <code><span title="typing.List">List</span>[<a class="autorefs autorefs-internal" title="src.pheval.utils.phenopacket_utils.ProbandDisease" href="#src.pheval.utils.phenopacket_utils.ProbandDisease">ProbandDisease</a>]</code>
           </td>
-          <td><p>List[ProbandDisease]: List of diagnosed diseases</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List[ProbandDisease]: List of diagnosed diseases</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">331</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">331</span>
 <span class="normal">332</span>
 <span class="normal">333</span>
 <span class="normal">334</span>
@@ -4539,17 +4670,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.diagnoses" class="doc
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="k">return</span> <span class="nb">list</span><span class="p">(</span><span class="nb">set</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">_diagnosis_from_interpretations</span><span class="p">()</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">_diagnosis_from_disease</span><span class="p">()))</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.diseases" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">diseases</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">diseases</span><span class="p">()</span></code>
 
 </h3>
 
@@ -4558,6 +4690,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.diseases" class="doc
   
       <p>Retrieve a list of Diseases associated with the proband</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -4571,14 +4705,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.diseases" class="doc
           <td>
                 <code><span title="typing.List">List</span>[<span title="phenopackets.Disease">Disease</span>]</code>
           </td>
-          <td><p>List[Disease]: List of diseases</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List[Disease]: List of diseases</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">283</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">283</span>
 <span class="normal">284</span>
 <span class="normal">285</span>
 <span class="normal">286</span>
@@ -4600,17 +4738,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.diseases" class="doc
     <span class="k">else</span><span class="p">:</span>
         <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">phenopacket_contents</span><span class="o">.</span><span class="n">diseases</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.files" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">files</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">files</span><span class="p">()</span></code>
 
 </h3>
 
@@ -4619,6 +4758,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.files" class="doc doc
   
       <p>Retrieve a list of files associated with a phenopacket</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -4632,14 +4773,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.files" class="doc doc
           <td>
                 <code><span title="typing.List">List</span>[<span title="phenopackets.File">File</span>]</code>
           </td>
-          <td><p>List[File]: List of files associated with a phenopacket</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List[File]: List of files associated with a phenopacket</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">380</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">380</span>
 <span class="normal">381</span>
 <span class="normal">382</span>
 <span class="normal">383</span>
@@ -4655,17 +4800,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.files" class="doc doc
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">phenopacket_contents</span><span class="o">.</span><span class="n">files</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.interpretations" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">interpretations</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">interpretations</span><span class="p">()</span></code>
 
 </h3>
 
@@ -4674,6 +4820,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.interpretations" clas
   
       <p>Retrieve a list of interpretations from a Phenopacket</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -4687,14 +4835,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.interpretations" clas
           <td>
                 <code><span title="typing.List">List</span>[<span title="phenopackets.Interpretation">Interpretation</span>]</code>
           </td>
-          <td><p>List[Interpretation]: List of interpretations</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List[Interpretation]: List of interpretations</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">340</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">340</span>
 <span class="normal">341</span>
 <span class="normal">342</span>
 <span class="normal">343</span>
@@ -4716,17 +4868,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.interpretations" clas
     <span class="k">else</span><span class="p">:</span>
         <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">phenopacket_contents</span><span class="o">.</span><span class="n">interpretations</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.negated_phenotypic_features" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">negated_phenotypic_features</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">negated_phenotypic_features</span><span class="p">()</span></code>
 
 </h3>
 
@@ -4735,6 +4888,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.negated_phenotypic_fe
   
       <p>Retrieve a list of all negated HPO terms</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -4748,14 +4903,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.negated_phenotypic_fe
           <td>
                 <code><span title="typing.List">List</span>[<span title="phenopackets.PhenotypicFeature">PhenotypicFeature</span>]</code>
           </td>
-          <td><p>List[PhenotypicFeature]: List of negated HPO terms</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List[PhenotypicFeature]: List of negated HPO terms</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">269</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">269</span>
 <span class="normal">270</span>
 <span class="normal">271</span>
 <span class="normal">272</span>
@@ -4781,17 +4940,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.negated_phenotypic_fe
             <span class="n">negated_phenotypic_features</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
     <span class="k">return</span> <span class="n">negated_phenotypic_features</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.observed_phenotypic_features" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">observed_phenotypic_features</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">observed_phenotypic_features</span><span class="p">()</span></code>
 
 </h3>
 
@@ -4800,6 +4960,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.observed_phenotypic_f
   
       <p>Retrieve a list of all observed HPO terms</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -4813,14 +4975,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.observed_phenotypic_f
           <td>
                 <code><span title="typing.List">List</span>[<span title="phenopackets.PhenotypicFeature">PhenotypicFeature</span>]</code>
           </td>
-          <td><p>List[PhenotypicFeature]: List of observed HPO terms</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List[PhenotypicFeature]: List of observed HPO terms</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">254</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">254</span>
 <span class="normal">255</span>
 <span class="normal">256</span>
 <span class="normal">257</span>
@@ -4848,17 +5014,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.observed_phenotypic_f
         <span class="n">phenotypic_features</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">p</span><span class="p">)</span>
     <span class="k">return</span> <span class="n">phenotypic_features</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.phenotypic_features" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">phenotypic_features</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">phenotypic_features</span><span class="p">()</span></code>
 
 </h3>
 
@@ -4867,6 +5034,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.phenotypic_features"
   
       <p>Retrieve a list of all HPO terms</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -4880,14 +5049,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.phenotypic_features"
           <td>
                 <code><span title="typing.List">List</span>[<span title="phenopackets.PhenotypicFeature">PhenotypicFeature</span>]</code>
           </td>
-          <td><p>List[PhenotypicFeature]: List of HPO terms</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>List[PhenotypicFeature]: List of HPO terms</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">242</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">242</span>
 <span class="normal">243</span>
 <span class="normal">244</span>
 <span class="normal">245</span>
@@ -4909,17 +5082,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.phenotypic_features"
     <span class="k">else</span><span class="p">:</span>
         <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">phenopacket_contents</span><span class="o">.</span><span class="n">phenotypic_features</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.sample_id" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">sample_id</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">sample_id</span><span class="p">()</span></code>
 
 </h3>
 
@@ -4928,6 +5102,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.sample_id" class="doc
   
       <p>Retrieve the sample ID from a Phenopacket or proband of a Family</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -4941,14 +5117,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.sample_id" class="doc
 <td><code>str</code></td>          <td>
                 <code>str</code>
           </td>
-          <td><p>Sample ID</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Sample ID</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">230</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">230</span>
 <span class="normal">231</span>
 <span class="normal">232</span>
 <span class="normal">233</span>
@@ -4970,17 +5150,18 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.sample_id" class="doc
     <span class="k">else</span><span class="p">:</span>
         <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">phenopacket_contents</span><span class="o">.</span><span class="n">subject</span><span class="o">.</span><span class="n">id</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.vcf_file_data" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">vcf_file_data</span><span class="p">(</span><span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">vcf_dir</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">vcf_file_data</span><span class="p">(</span><span class="n">phenopacket_path</span><span class="p">,</span> <span class="n">vcf_dir</span><span class="p">)</span></code>
 
 </h3>
 
@@ -4989,6 +5170,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.vcf_file_data" class=
   
       <p>Retrieve the genome assembly and VCF file name from a phenopacket.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -5005,7 +5188,11 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.vcf_file_data" class=
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The path to the phenopacket file.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The path to the phenopacket file.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -5015,7 +5202,11 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.vcf_file_data" class=
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The directory path where the VCF file is stored.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The directory path where the VCF file is stored.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -5023,6 +5214,8 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.vcf_file_data" class=
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -5036,11 +5229,17 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.vcf_file_data" class=
 <td><code>File</code></td>          <td>
                 <code><span title="phenopackets.File">File</span></code>
           </td>
-          <td><p>The VCF file with updated URI pointing to the specified directory.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The VCF file with updated URI pointing to the specified directory.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
+
+
   <p><strong>Raises:</strong></p>
   <table>
     <thead>
@@ -5054,26 +5253,34 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.vcf_file_data" class=
           <td>
                 <code><span title="pheval.prepare.custom_exceptions.IncorrectFileFormatError">IncorrectFileFormatError</span></code>
           </td>
-          <td><p>If the provided file is not in .vcf or .vcf.gz format.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>If the provided file is not in .vcf or .vcf.gz format.</p>
+            </div>
+          </td>
         </tr>
         <tr>
           <td>
                 <code><a class="autorefs autorefs-internal" title="src.pheval.utils.phenopacket_utils.IncompatibleGenomeAssemblyError" href="#src.pheval.utils.phenopacket_utils.IncompatibleGenomeAssemblyError">IncompatibleGenomeAssemblyError</a></code>
           </td>
-          <td><p>If the genome assembly of the VCF file is not compatible.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>If the genome assembly of the VCF file is not compatible.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-<details class="note">
+<details class="note" open>
   <summary>Note</summary>
   <p>This function searches for a VCF file within the provided list of files, validates its format,
 and checks if the genome assembly is compatible. If the conditions are met, it updates the
 URI of the VCF file to the specified directory and returns the modified file object.</p>
 </details>
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">389</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">389</span>
 <span class="normal">390</span>
 <span class="normal">391</span>
 <span class="normal">392</span>
@@ -5137,7 +5344,7 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.vcf_file_data" class=
     <span class="n">vcf_data</span><span class="o">.</span><span class="n">uri</span> <span class="o">=</span> <span class="nb">str</span><span class="p">(</span><span class="n">vcf_dir</span><span class="o">.</span><span class="n">joinpath</span><span class="p">(</span><span class="n">Path</span><span class="p">(</span><span class="n">vcf_data</span><span class="o">.</span><span class="n">uri</span><span class="p">)</span><span class="o">.</span><span class="n">name</span><span class="p">))</span>
     <span class="k">return</span> <span class="n">vcf_data</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
@@ -5148,6 +5355,7 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.vcf_file_data" class=
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -5155,7 +5363,7 @@ <h3 id="src.pheval.utils.phenopacket_utils.PhenopacketUtil.vcf_file_data" class=
 
 
 <h2 id="src.pheval.utils.phenopacket_utils.ProbandCausativeGene" class="doc doc-heading">
-        <code>ProbandCausativeGene</code>
+          <code>ProbandCausativeGene</code>
 
   
   <span class="doc doc-labels">
@@ -5170,6 +5378,8 @@ <h2 id="src.pheval.utils.phenopacket_utils.ProbandCausativeGene" class="doc doc-
   
       <p>Represents a causative gene associated with a proband</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -5186,7 +5396,11 @@ <h2 id="src.pheval.utils.phenopacket_utils.ProbandCausativeGene" class="doc doc-
           <td>
                 <code>str</code>
           </td>
-          <td><p>Symbol representing the gene</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Symbol representing the gene</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -5196,24 +5410,25 @@ <h2 id="src.pheval.utils.phenopacket_utils.ProbandCausativeGene" class="doc doc-
           <td>
                 <code>str</code>
           </td>
-          <td><p>The ENSEMBL gene identifier for the result entry</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The ENSEMBL gene identifier for the result entry</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
         </tr>
     </tbody>
   </table>
-
-<details class="notes">
-  <summary>Notes</summary>
-  <p>While we recommend providing the gene identifier in the ENSEMBL namespace,
-any matching format used in Phenopacket interpretations and result output is acceptable
-for result matching purposes in the analysis.</p>
-</details>
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 88</span>
+      <p>Notes:
+     While we recommend providing the gene identifier in the ENSEMBL namespace,
+     any matching format used in Phenopacket interpretations and result output is acceptable
+     for result matching purposes in the analysis.</p>
+
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 88</span>
 <span class="normal"> 89</span>
 <span class="normal"> 90</span>
 <span class="normal"> 91</span>
@@ -5245,7 +5460,7 @@ <h2 id="src.pheval.utils.phenopacket_utils.ProbandCausativeGene" class="doc doc-
     <span class="n">gene_symbol</span><span class="p">:</span> <span class="nb">str</span>
     <span class="n">gene_identifier</span><span class="p">:</span> <span class="nb">str</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -5265,6 +5480,7 @@ <h2 id="src.pheval.utils.phenopacket_utils.ProbandCausativeGene" class="doc doc-
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -5272,7 +5488,7 @@ <h2 id="src.pheval.utils.phenopacket_utils.ProbandCausativeGene" class="doc doc-
 
 
 <h2 id="src.pheval.utils.phenopacket_utils.ProbandCausativeVariant" class="doc doc-heading">
-        <code>ProbandCausativeVariant</code>
+          <code>ProbandCausativeVariant</code>
 
   
   <span class="doc doc-labels">
@@ -5287,6 +5503,8 @@ <h2 id="src.pheval.utils.phenopacket_utils.ProbandCausativeVariant" class="doc d
   
       <p>Represents a causative variant associated with a proband</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -5303,7 +5521,11 @@ <h2 id="src.pheval.utils.phenopacket_utils.ProbandCausativeVariant" class="doc d
           <td>
                 <code>str</code>
           </td>
-          <td><p>ID of the proband</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>ID of the proband</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -5313,7 +5535,11 @@ <h2 id="src.pheval.utils.phenopacket_utils.ProbandCausativeVariant" class="doc d
           <td>
                 <code>str</code>
           </td>
-          <td><p>Genome assembly</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Genome assembly</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -5323,7 +5549,11 @@ <h2 id="src.pheval.utils.phenopacket_utils.ProbandCausativeVariant" class="doc d
           <td>
                 <code><a class="autorefs autorefs-internal" title="src.pheval.utils.phenopacket_utils.GenomicVariant" href="#src.pheval.utils.phenopacket_utils.GenomicVariant">GenomicVariant</a></code>
           </td>
-          <td><p>Genomic variant associated with the proband</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Genomic variant associated with the proband</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -5333,7 +5563,11 @@ <h2 id="src.pheval.utils.phenopacket_utils.ProbandCausativeVariant" class="doc d
           <td>
                 <code>str</code>
           </td>
-          <td><p>Genotype information for the variant</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Genotype information for the variant</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -5343,7 +5577,11 @@ <h2 id="src.pheval.utils.phenopacket_utils.ProbandCausativeVariant" class="doc d
           <td>
                 <code>str</code>
           </td>
-          <td><p>Additional information about the variant (default is an empty string)</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Additional information about the variant (default is an empty string)</p>
+            </div>
+          </td>
           <td>
                 <code>&#39;&#39;</code>
           </td>
@@ -5351,10 +5589,9 @@ <h2 id="src.pheval.utils.phenopacket_utils.ProbandCausativeVariant" class="doc d
     </tbody>
   </table>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">68</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">68</span>
 <span class="normal">69</span>
 <span class="normal">70</span>
 <span class="normal">71</span>
@@ -5390,7 +5627,7 @@ <h2 id="src.pheval.utils.phenopacket_utils.ProbandCausativeVariant" class="doc d
     <span class="n">genotype</span><span class="p">:</span> <span class="nb">str</span>
     <span class="n">info</span><span class="p">:</span> <span class="nb">str</span> <span class="o">=</span> <span class="s2">&quot;&quot;</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -5410,6 +5647,7 @@ <h2 id="src.pheval.utils.phenopacket_utils.ProbandCausativeVariant" class="doc d
 
   </div>
 
+
 </div>
 
 <div class="doc doc-object doc-class">
@@ -5417,7 +5655,7 @@ <h2 id="src.pheval.utils.phenopacket_utils.ProbandCausativeVariant" class="doc d
 
 
 <h2 id="src.pheval.utils.phenopacket_utils.ProbandDisease" class="doc doc-heading">
-        <code>ProbandDisease</code>
+          <code>ProbandDisease</code>
 
   
   <span class="doc doc-labels">
@@ -5432,6 +5670,8 @@ <h2 id="src.pheval.utils.phenopacket_utils.ProbandDisease" class="doc doc-headin
   
       <p>Represents a disease associated with a proband</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -5448,7 +5688,11 @@ <h2 id="src.pheval.utils.phenopacket_utils.ProbandDisease" class="doc doc-headin
           <td>
                 <code>str</code>
           </td>
-          <td><p>Name of the disease</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Name of the disease</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -5458,7 +5702,11 @@ <h2 id="src.pheval.utils.phenopacket_utils.ProbandDisease" class="doc doc-headin
           <td>
                 <code>str</code>
           </td>
-          <td><p>Identifier for the disease result entry in the OMIM namespace</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Identifier for the disease result entry in the OMIM namespace</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -5466,16 +5714,15 @@ <h2 id="src.pheval.utils.phenopacket_utils.ProbandDisease" class="doc doc-headin
     </tbody>
   </table>
 
-<details class="notes">
+<details class="notes" open>
   <summary>Notes</summary>
   <p>While we recommend providing the disease identifier in the OMIM namespace,
 any matching format used in Phenopacket interpretations and result output is acceptable
 for result matching purposes in the analysis.</p>
 </details>
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">106</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">106</span>
 <span class="normal">107</span>
 <span class="normal">108</span>
 <span class="normal">109</span>
@@ -5509,7 +5756,7 @@ <h2 id="src.pheval.utils.phenopacket_utils.ProbandDisease" class="doc doc-headin
     <span class="n">disease_name</span><span class="p">:</span> <span class="nb">str</span>
     <span class="n">disease_identifier</span><span class="p">:</span> <span class="nb">str</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -5529,15 +5776,17 @@ <h2 id="src.pheval.utils.phenopacket_utils.ProbandDisease" class="doc doc-headin
 
   </div>
 
+
 </div>
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.utils.phenopacket_utils.create_gene_identifier_map" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">create_gene_identifier_map</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">create_gene_identifier_map</span><span class="p">()</span></code>
 
 </h2>
 
@@ -5546,6 +5795,8 @@ <h2 id="src.pheval.utils.phenopacket_utils.create_gene_identifier_map" class="do
   
       <p>Create a mapping of gene identifiers to gene symbols using HGNC data.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -5559,12 +5810,16 @@ <h2 id="src.pheval.utils.phenopacket_utils.create_gene_identifier_map" class="do
 <td><code>dict</code></td>          <td>
                 <code>dict</code>
           </td>
-          <td><p>A mapping of gene identifiers to gene symbols.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>A mapping of gene identifiers to gene symbols.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-<details class="notes">
+<details class="notes" open>
   <summary>Notes</summary>
   <p>The dictionary structure:
 {
@@ -5572,9 +5827,9 @@ <h2 id="src.pheval.utils.phenopacket_utils.create_gene_identifier_map" class="do
     ...
 }</p>
 </details>
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">176</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">176</span>
 <span class="normal">177</span>
 <span class="normal">178</span>
 <span class="normal">179</span>
@@ -5618,17 +5873,18 @@ <h2 id="src.pheval.utils.phenopacket_utils.create_gene_identifier_map" class="do
         <span class="n">identifier_map</span><span class="p">[</span><span class="n">row</span><span class="p">[</span><span class="s2">&quot;refseq_accession&quot;</span><span class="p">]]</span> <span class="o">=</span> <span class="n">row</span><span class="p">[</span><span class="s2">&quot;symbol&quot;</span><span class="p">]</span>
     <span class="k">return</span> <span class="n">identifier_map</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.utils.phenopacket_utils.create_hgnc_dict" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">create_hgnc_dict</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">create_hgnc_dict</span><span class="p">()</span></code>
 
 </h2>
 
@@ -5637,6 +5893,8 @@ <h2 id="src.pheval.utils.phenopacket_utils.create_hgnc_dict" class="doc doc-head
   
       <p>Create a dictionary as a reference for updating gene symbols and identifiers based on HGNC data.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -5650,12 +5908,16 @@ <h2 id="src.pheval.utils.phenopacket_utils.create_hgnc_dict" class="doc doc-head
 <td><code>defaultdict</code></td>          <td>
                 <code><span title="collections.defaultdict">defaultdict</span></code>
           </td>
-          <td><p>A dictionary containing gene symbols as keys and their associated gene information.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>A dictionary containing gene symbols as keys and their associated gene information.</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-<details class="notes">
+<details class="notes" open>
   <summary>Notes</summary>
   <p>The dictionary structure:
 {
@@ -5669,9 +5931,9 @@ <h2 id="src.pheval.utils.phenopacket_utils.create_hgnc_dict" class="doc doc-head
     ...
 }</p>
 </details>
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">139</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">139</span>
 <span class="normal">140</span>
 <span class="normal">141</span>
 <span class="normal">142</span>
@@ -5741,17 +6003,18 @@ <h2 id="src.pheval.utils.phenopacket_utils.create_hgnc_dict" class="doc doc-head
 
     <span class="k">return</span> <span class="n">hgnc_data</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.utils.phenopacket_utils.create_json_message" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">create_json_message</span><span class="p">(</span><span class="n">phenopacket</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">create_json_message</span><span class="p">(</span><span class="n">phenopacket</span><span class="p">)</span></code>
 
 </h2>
 
@@ -5759,16 +6022,14 @@ <h2 id="src.pheval.utils.phenopacket_utils.create_json_message" class="doc doc-h
   <div class="doc doc-contents ">
   
       <p>Create a JSON message for writing to a file.</p>
-      <ul>
-<li>phenopacket (Union[Phenopacket, Family]): The Phenopacket or Family object to convert to JSON.</li>
-</ul>
-      <ul>
-<li>str: A JSON-formatted string representation of the Phenopacket or Family object.</li>
-</ul>
-
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">608</span>
+<p>Args:
+- phenopacket (Union[Phenopacket, Family]): The Phenopacket or Family object to convert to JSON.</p>
+<p>Returns:
+- str: A JSON-formatted string representation of the Phenopacket or Family object.</p>
+
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">608</span>
 <span class="normal">609</span>
 <span class="normal">610</span>
 <span class="normal">611</span>
@@ -5790,17 +6051,18 @@ <h2 id="src.pheval.utils.phenopacket_utils.create_json_message" class="doc doc-h
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="k">return</span> <span class="n">MessageToJson</span><span class="p">(</span><span class="n">phenopacket</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.utils.phenopacket_utils.phenopacket_reader" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">phenopacket_reader</span><span class="p">(</span><span class="n">file</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">phenopacket_reader</span><span class="p">(</span><span class="n">file</span><span class="p">)</span></code>
 
 </h2>
 
@@ -5809,6 +6071,8 @@ <h2 id="src.pheval.utils.phenopacket_utils.phenopacket_reader" class="doc doc-he
   
       <p>Read a Phenopacket file and returns its contents as a Phenopacket or Family object</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -5825,7 +6089,11 @@ <h2 id="src.pheval.utils.phenopacket_utils.phenopacket_reader" class="doc doc-he
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Path to the Phenopacket file</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Path to the Phenopacket file</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -5833,6 +6101,8 @@ <h2 id="src.pheval.utils.phenopacket_utils.phenopacket_reader" class="doc doc-he
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -5846,14 +6116,18 @@ <h2 id="src.pheval.utils.phenopacket_utils.phenopacket_reader" class="doc doc-he
           <td>
                 <code><span title="typing.Union">Union</span>[<span title="phenopackets.Phenopacket">Phenopacket</span>, <span title="phenopackets.Family">Family</span>]</code>
           </td>
-          <td><p>Union[Phenopacket, Family]: Contents of the Phenopacket file as a Phenopacket or Family object</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Union[Phenopacket, Family]: Contents of the Phenopacket file as a Phenopacket or Family object</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">200</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">200</span>
 <span class="normal">201</span>
 <span class="normal">202</span>
 <span class="normal">203</span>
@@ -5887,17 +6161,18 @@ <h2 id="src.pheval.utils.phenopacket_utils.phenopacket_reader" class="doc doc-he
     <span class="k">else</span><span class="p">:</span>
         <span class="k">return</span> <span class="n">Parse</span><span class="p">(</span><span class="n">json</span><span class="o">.</span><span class="n">dumps</span><span class="p">(</span><span class="n">phenopacket</span><span class="p">),</span> <span class="n">Phenopacket</span><span class="p">())</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.utils.phenopacket_utils.read_hgnc_data" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">read_hgnc_data</span><span class="p">()</span></code>
+          <code class="highlight language-python"><span class="n">read_hgnc_data</span><span class="p">()</span></code>
 
 </h2>
 
@@ -5906,6 +6181,8 @@ <h2 id="src.pheval.utils.phenopacket_utils.read_hgnc_data" class="doc doc-headin
   
       <p>Read HGNC data from a file and return it as a Pandas DataFrame.</p>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -5917,16 +6194,20 @@ <h2 id="src.pheval.utils.phenopacket_utils.read_hgnc_data" class="doc doc-headin
     <tbody>
         <tr>
           <td>
-                <code><span title="pandas">pd</span>.<span title="pandas.DataFrame">DataFrame</span></code>
+                <code><span title="pandas.DataFrame">DataFrame</span></code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>pd.DataFrame: DataFrame containing the HGNC data.</p>
+            </div>
           </td>
-          <td><p>pd.DataFrame: DataFrame containing the HGNC data.</p></td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">125</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">125</span>
 <span class="normal">126</span>
 <span class="normal">127</span>
 <span class="normal">128</span>
@@ -5950,17 +6231,18 @@ <h2 id="src.pheval.utils.phenopacket_utils.read_hgnc_data" class="doc doc-headin
         <span class="n">dtype</span><span class="o">=</span><span class="nb">str</span><span class="p">,</span>
     <span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.utils.phenopacket_utils.write_phenopacket" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">write_phenopacket</span><span class="p">(</span><span class="n">phenopacket</span><span class="p">,</span> <span class="n">output_file</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">write_phenopacket</span><span class="p">(</span><span class="n">phenopacket</span><span class="p">,</span> <span class="n">output_file</span><span class="p">)</span></code>
 
 </h2>
 
@@ -5969,6 +6251,8 @@ <h2 id="src.pheval.utils.phenopacket_utils.write_phenopacket" class="doc doc-hea
   
       <p>Write a Phenopacket or Family object to a file in JSON format.</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -5983,9 +6267,13 @@ <h2 id="src.pheval.utils.phenopacket_utils.write_phenopacket" class="doc doc-hea
         <tr>
           <td><code>phenopacket</code></td>
           <td>
-                <code>Phenopacket or Family</code>
+                <code><span title="phenopackets.Phenopacket">Phenopacket</span> or <span title="phenopackets.Family">Family</span></code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>The Phenopacket or Family object to be written.</p>
+            </div>
           </td>
-          <td><p>The Phenopacket or Family object to be written.</p></td>
           <td>
               <em>required</em>
           </td>
@@ -5995,7 +6283,11 @@ <h2 id="src.pheval.utils.phenopacket_utils.write_phenopacket" class="doc doc-hea
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>The Path object representing the file to write the Phenopacket data.</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>The Path object representing the file to write the Phenopacket data.</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -6003,6 +6295,8 @@ <h2 id="src.pheval.utils.phenopacket_utils.write_phenopacket" class="doc doc-hea
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -6016,14 +6310,18 @@ <h2 id="src.pheval.utils.phenopacket_utils.write_phenopacket" class="doc doc-hea
           <td>
                 <code>None</code>
           </td>
-          <td><p>None</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>None</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">621</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">621</span>
 <span class="normal">622</span>
 <span class="normal">623</span>
 <span class="normal">624</span>
@@ -6053,7 +6351,7 @@ <h2 id="src.pheval.utils.phenopacket_utils.write_phenopacket" class="doc doc-hea
         <span class="n">outfile</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">phenopacket_json</span><span class="p">)</span>
     <span class="n">outfile</span><span class="o">.</span><span class="n">close</span><span class="p">()</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
diff --git a/api/pheval/utils/semsim_utils/index.html b/api/pheval/utils/semsim_utils/index.html
index 6fcff257e..aea320e1e 100644
--- a/api/pheval/utils/semsim_utils/index.html
+++ b/api/pheval/utils/semsim_utils/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/utils/semsim_utils/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -1020,63 +1020,63 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.semsim_utils" class="md-nav__link">
-    src.pheval.utils.semsim_utils
+    semsim_utils
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.semsim_utils.diff_semsim" class="md-nav__link">
-    diff_semsim()
+    diff_semsim
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.semsim_utils.filter_non_0_score" class="md-nav__link">
-    filter_non_0_score()
+    filter_non_0_score
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.semsim_utils.get_percentage_diff" class="md-nav__link">
-    get_percentage_diff()
+    get_percentage_diff
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.semsim_utils.parse_semsim" class="md-nav__link">
-    parse_semsim()
+    parse_semsim
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.semsim_utils.percentage_diff" class="md-nav__link">
-    percentage_diff()
+    percentage_diff
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.semsim_utils.semsim_analysis" class="md-nav__link">
-    semsim_analysis()
+    semsim_analysis
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.semsim_utils.semsim_heatmap_plot" class="md-nav__link">
-    semsim_heatmap_plot()
+    semsim_heatmap_plot
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.semsim_utils.validate_semsim_file_comparison" class="md-nav__link">
-    validate_semsim_file_comparison()
+    validate_semsim_file_comparison
   </a>
   
 </li>
@@ -1263,63 +1263,63 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.semsim_utils" class="md-nav__link">
-    src.pheval.utils.semsim_utils
+    semsim_utils
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.semsim_utils.diff_semsim" class="md-nav__link">
-    diff_semsim()
+    diff_semsim
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.semsim_utils.filter_non_0_score" class="md-nav__link">
-    filter_non_0_score()
+    filter_non_0_score
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.semsim_utils.get_percentage_diff" class="md-nav__link">
-    get_percentage_diff()
+    get_percentage_diff
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.semsim_utils.parse_semsim" class="md-nav__link">
-    parse_semsim()
+    parse_semsim
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.semsim_utils.percentage_diff" class="md-nav__link">
-    percentage_diff()
+    percentage_diff
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.semsim_utils.semsim_analysis" class="md-nav__link">
-    semsim_analysis()
+    semsim_analysis
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.semsim_utils.semsim_heatmap_plot" class="md-nav__link">
-    semsim_heatmap_plot()
+    semsim_heatmap_plot
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.semsim_utils.validate_semsim_file_comparison" class="md-nav__link">
-    validate_semsim_file_comparison()
+    validate_semsim_file_comparison
   </a>
   
 </li>
@@ -1349,6 +1349,7 @@ <h1>Semsim utils</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.utils.semsim_utils"></a>
   <div class="doc doc-contents first">
   
@@ -1366,12 +1367,13 @@ <h1>Semsim utils</h1>
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.utils.semsim_utils.diff_semsim" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">diff_semsim</span><span class="p">(</span><span class="n">semsim_left</span><span class="p">,</span> <span class="n">semsim_right</span><span class="p">,</span> <span class="n">score_column</span><span class="p">,</span> <span class="n">absolute_diff</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">diff_semsim</span><span class="p">(</span><span class="n">semsim_left</span><span class="p">,</span> <span class="n">semsim_right</span><span class="p">,</span> <span class="n">score_column</span><span class="p">,</span> <span class="n">absolute_diff</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1380,6 +1382,8 @@ <h2 id="src.pheval.utils.semsim_utils.diff_semsim" class="doc doc-heading">
   
       <p>Calculates score difference between two semantic similarity profiles</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1394,9 +1398,13 @@ <h2 id="src.pheval.utils.semsim_utils.diff_semsim" class="doc doc-heading">
         <tr>
           <td><code>semsim_left</code></td>
           <td>
-                <code><span title="pandas">pd</span>.<span title="pandas.DataFrame">DataFrame</span></code>
+                <code><span title="pandas.DataFrame">DataFrame</span></code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>first semantic similarity dataframe</p>
+            </div>
           </td>
-          <td><p>first semantic similarity dataframe</p></td>
           <td>
               <em>required</em>
           </td>
@@ -1404,9 +1412,13 @@ <h2 id="src.pheval.utils.semsim_utils.diff_semsim" class="doc doc-heading">
         <tr>
           <td><code>semsim_right</code></td>
           <td>
-                <code><span title="pandas">pd</span>.<span title="pandas.DataFrame">DataFrame</span></code>
+                <code><span title="pandas.DataFrame">DataFrame</span></code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>second semantic similarity dataframe</p>
+            </div>
           </td>
-          <td><p>second semantic similarity dataframe</p></td>
           <td>
               <em>required</em>
           </td>
@@ -1416,7 +1428,11 @@ <h2 id="src.pheval.utils.semsim_utils.diff_semsim" class="doc doc-heading">
           <td>
                 <code>str</code>
           </td>
-          <td><p>Score column that will be computed (e.g. jaccard_similarity)</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Score column that will be computed (e.g. jaccard_similarity)</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1426,7 +1442,11 @@ <h2 id="src.pheval.utils.semsim_utils.diff_semsim" class="doc doc-heading">
           <td>
                 <code>bool</code>
           </td>
-          <td><p>Whether the difference is absolute (True) or percentage (False).</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Whether the difference is absolute (True) or percentage (False).</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1434,6 +1454,8 @@ <h2 id="src.pheval.utils.semsim_utils.diff_semsim" class="doc doc-heading">
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -1445,16 +1467,20 @@ <h2 id="src.pheval.utils.semsim_utils.diff_semsim" class="doc doc-heading">
     <tbody>
         <tr>
           <td>
-                <code><span title="pandas">pd</span>.<span title="pandas.DataFrame">DataFrame</span></code>
+                <code><span title="pandas.DataFrame">DataFrame</span></code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>pd.DataFrame: A dataframe with terms and its scores differences</p>
+            </div>
           </td>
-          <td><p>pd.DataFrame: A dataframe with terms and its scores differences</p></td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/semsim_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">42</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/semsim_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">42</span>
 <span class="normal">43</span>
 <span class="normal">44</span>
 <span class="normal">45</span>
@@ -1500,17 +1526,18 @@ <h2 id="src.pheval.utils.semsim_utils.diff_semsim" class="doc doc-heading">
     <span class="p">)</span>
     <span class="k">return</span> <span class="n">df</span><span class="p">[[</span><span class="s2">&quot;subject_id&quot;</span><span class="p">,</span> <span class="s2">&quot;object_id&quot;</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">score_column</span><span class="si">}</span><span class="s2">_x&quot;</span><span class="p">,</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">score_column</span><span class="si">}</span><span class="s2">_y&quot;</span><span class="p">,</span> <span class="s2">&quot;diff&quot;</span><span class="p">]]</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.utils.semsim_utils.filter_non_0_score" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">filter_non_0_score</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">col</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">filter_non_0_score</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">col</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1519,6 +1546,8 @@ <h2 id="src.pheval.utils.semsim_utils.filter_non_0_score" class="doc doc-heading
   
       <p>Removes rows that have value equal to 0 based on the given column passed by col parameter</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1533,9 +1562,13 @@ <h2 id="src.pheval.utils.semsim_utils.filter_non_0_score" class="doc doc-heading
         <tr>
           <td><code>data</code></td>
           <td>
-                <code><span title="pandas">pd</span>.<span title="pandas.DataFrame">DataFrame</span></code>
+                <code><span title="pandas.DataFrame">DataFrame</span></code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>Dirty dataframe</p>
+            </div>
           </td>
-          <td><p>Dirty dataframe</p></td>
           <td>
               <em>required</em>
           </td>
@@ -1545,7 +1578,11 @@ <h2 id="src.pheval.utils.semsim_utils.filter_non_0_score" class="doc doc-heading
           <td>
                 <code>str</code>
           </td>
-          <td><p>Column to be filtered</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Column to be filtered</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1553,6 +1590,8 @@ <h2 id="src.pheval.utils.semsim_utils.filter_non_0_score" class="doc doc-heading
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -1564,16 +1603,20 @@ <h2 id="src.pheval.utils.semsim_utils.filter_non_0_score" class="doc doc-heading
     <tbody>
         <tr>
           <td>
-                <code><span title="pandas">pd</span>.<span title="pandas.DataFrame">DataFrame</span></code>
+                <code><span title="pandas.DataFrame">DataFrame</span></code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>pd.DataFrame: Filtered dataframe</p>
+            </div>
           </td>
-          <td><p>pd.DataFrame: Filtered dataframe</p></td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/semsim_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">14</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/semsim_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">14</span>
 <span class="normal">15</span>
 <span class="normal">16</span>
 <span class="normal">17</span>
@@ -1595,17 +1638,18 @@ <h2 id="src.pheval.utils.semsim_utils.filter_non_0_score" class="doc doc-heading
 <span class="sd">    &quot;&quot;&quot;</span>
     <span class="k">return</span> <span class="n">data</span><span class="p">[</span><span class="n">data</span><span class="p">[</span><span class="n">col</span><span class="p">]</span> <span class="o">!=</span> <span class="mi">0</span><span class="p">]</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.utils.semsim_utils.get_percentage_diff" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">get_percentage_diff</span><span class="p">(</span><span class="n">current_number</span><span class="p">,</span> <span class="n">previous_number</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">get_percentage_diff</span><span class="p">(</span><span class="n">current_number</span><span class="p">,</span> <span class="n">previous_number</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1614,6 +1658,8 @@ <h2 id="src.pheval.utils.semsim_utils.get_percentage_diff" class="doc doc-headin
   
       <p>Gets the percentage difference between two numbers</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1630,7 +1676,11 @@ <h2 id="src.pheval.utils.semsim_utils.get_percentage_diff" class="doc doc-headin
           <td>
                 <code>float</code>
           </td>
-          <td><p>second number in comparison</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>second number in comparison</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1640,7 +1690,11 @@ <h2 id="src.pheval.utils.semsim_utils.get_percentage_diff" class="doc doc-headin
           <td>
                 <code>float</code>
           </td>
-          <td><p>first number in comparison</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>first number in comparison</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1648,6 +1702,8 @@ <h2 id="src.pheval.utils.semsim_utils.get_percentage_diff" class="doc doc-headin
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -1661,14 +1717,18 @@ <h2 id="src.pheval.utils.semsim_utils.get_percentage_diff" class="doc doc-headin
 <td><code>float</code></td>          <td>
                 <code>float</code>
           </td>
-          <td><p>percentage difference between two numbers</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>percentage difference between two numbers</p>
+            </div>
+          </td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/semsim_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">138</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/semsim_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">138</span>
 <span class="normal">139</span>
 <span class="normal">140</span>
 <span class="normal">141</span>
@@ -1708,17 +1768,18 @@ <h2 id="src.pheval.utils.semsim_utils.get_percentage_diff" class="doc doc-headin
     <span class="k">except</span> <span class="ne">ZeroDivisionError</span><span class="p">:</span>
         <span class="k">return</span> <span class="kc">None</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.utils.semsim_utils.parse_semsim" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">parse_semsim</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">cols</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">parse_semsim</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">cols</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1727,6 +1788,8 @@ <h2 id="src.pheval.utils.semsim_utils.parse_semsim" class="doc doc-heading">
   
       <p>Parses semantic similarity profiles converting the score column as a numeric value and dropping the null ones</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1741,9 +1804,13 @@ <h2 id="src.pheval.utils.semsim_utils.parse_semsim" class="doc doc-heading">
         <tr>
           <td><code>df</code></td>
           <td>
-                <code><span title="pandas">pd</span>.<span title="pandas.DataFrame">DataFrame</span></code>
+                <code><span title="pandas.DataFrame">DataFrame</span></code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>semantic similarity profile dataframe</p>
+            </div>
           </td>
-          <td><p>semantic similarity profile dataframe</p></td>
           <td>
               <em>required</em>
           </td>
@@ -1753,7 +1820,11 @@ <h2 id="src.pheval.utils.semsim_utils.parse_semsim" class="doc doc-heading">
           <td>
                 <code>list</code>
           </td>
-          <td><p>list of columns that will be selected on semsim data</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>list of columns that will be selected on semsim data</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1761,6 +1832,8 @@ <h2 id="src.pheval.utils.semsim_utils.parse_semsim" class="doc doc-heading">
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -1772,16 +1845,20 @@ <h2 id="src.pheval.utils.semsim_utils.parse_semsim" class="doc doc-heading">
     <tbody>
         <tr>
           <td>
-                <code><span title="pandas">pd</span>.<span title="pandas.DataFrame">DataFrame</span></code>
+                <code><span title="pandas.DataFrame">DataFrame</span></code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>pd.Dataframe: parsed semantic similarity dataframe</p>
+            </div>
           </td>
-          <td><p>pd.Dataframe: parsed semantic similarity dataframe</p></td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/semsim_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">27</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/semsim_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">27</span>
 <span class="normal">28</span>
 <span class="normal">29</span>
 <span class="normal">30</span>
@@ -1807,17 +1884,18 @@ <h2 id="src.pheval.utils.semsim_utils.parse_semsim" class="doc doc-heading">
     <span class="n">df</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;None&quot;</span><span class="p">,</span> <span class="n">numpy</span><span class="o">.</span><span class="n">nan</span><span class="p">)</span><span class="o">.</span><span class="n">dropna</span><span class="p">(</span><span class="n">subset</span><span class="o">=</span><span class="n">cols</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">],</span> <span class="n">inplace</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
     <span class="k">return</span> <span class="n">df</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.utils.semsim_utils.percentage_diff" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">percentage_diff</span><span class="p">(</span><span class="n">semsim_left</span><span class="p">,</span> <span class="n">semsim_right</span><span class="p">,</span> <span class="n">score_column</span><span class="p">,</span> <span class="n">output</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">percentage_diff</span><span class="p">(</span><span class="n">semsim_left</span><span class="p">,</span> <span class="n">semsim_right</span><span class="p">,</span> <span class="n">score_column</span><span class="p">,</span> <span class="n">output</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1826,6 +1904,8 @@ <h2 id="src.pheval.utils.semsim_utils.percentage_diff" class="doc doc-heading">
   
       <p>Compares two semantic similarity profiles</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1842,7 +1922,11 @@ <h2 id="src.pheval.utils.semsim_utils.percentage_diff" class="doc doc-heading">
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>File path of the first semantic similarity profile</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>File path of the first semantic similarity profile</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1852,7 +1936,11 @@ <h2 id="src.pheval.utils.semsim_utils.percentage_diff" class="doc doc-heading">
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>File path of the second semantic similarity profile</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>File path of the second semantic similarity profile</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1862,7 +1950,11 @@ <h2 id="src.pheval.utils.semsim_utils.percentage_diff" class="doc doc-heading">
           <td>
                 <code>str</code>
           </td>
-          <td><p>Score column that will be computed (e.g. jaccard_similarity)</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Score column that will be computed (e.g. jaccard_similarity)</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1872,7 +1964,11 @@ <h2 id="src.pheval.utils.semsim_utils.percentage_diff" class="doc doc-heading">
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>Output path for the difference tsv file</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Output path for the difference tsv file</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1880,9 +1976,9 @@ <h2 id="src.pheval.utils.semsim_utils.percentage_diff" class="doc doc-heading">
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/semsim_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">67</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/semsim_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">67</span>
 <span class="normal">68</span>
 <span class="normal">69</span>
 <span class="normal">70</span>
@@ -1904,17 +2000,18 @@ <h2 id="src.pheval.utils.semsim_utils.percentage_diff" class="doc doc-heading">
     <span class="n">clean_df</span> <span class="o">=</span> <span class="n">semsim_analysis</span><span class="p">(</span><span class="n">semsim_left</span><span class="p">,</span> <span class="n">semsim_right</span><span class="p">,</span> <span class="n">score_column</span><span class="p">,</span> <span class="n">absolute_diff</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
     <span class="n">clean_df</span><span class="o">.</span><span class="n">sort_values</span><span class="p">(</span><span class="n">by</span><span class="o">=</span><span class="s2">&quot;diff&quot;</span><span class="p">,</span> <span class="n">ascending</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="n">output</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s2">&quot;</span><span class="se">\t</span><span class="s2">&quot;</span><span class="p">,</span> <span class="n">index</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.utils.semsim_utils.semsim_analysis" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">semsim_analysis</span><span class="p">(</span><span class="n">semsim_left</span><span class="p">,</span> <span class="n">semsim_right</span><span class="p">,</span> <span class="n">score_column</span><span class="p">,</span> <span class="n">absolute_diff</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">semsim_analysis</span><span class="p">(</span><span class="n">semsim_left</span><span class="p">,</span> <span class="n">semsim_right</span><span class="p">,</span> <span class="n">score_column</span><span class="p">,</span> <span class="n">absolute_diff</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1923,6 +2020,8 @@ <h2 id="src.pheval.utils.semsim_utils.semsim_analysis" class="doc doc-heading">
   
       <p>semsim_analysis</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -1939,7 +2038,11 @@ <h2 id="src.pheval.utils.semsim_utils.semsim_analysis" class="doc doc-heading">
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>File path of the first semantic similarity profile</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>File path of the first semantic similarity profile</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1949,7 +2052,11 @@ <h2 id="src.pheval.utils.semsim_utils.semsim_analysis" class="doc doc-heading">
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>File path of the second semantic similarity profile</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>File path of the second semantic similarity profile</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1959,7 +2066,11 @@ <h2 id="src.pheval.utils.semsim_utils.semsim_analysis" class="doc doc-heading">
           <td>
                 <code>str</code>
           </td>
-          <td><p>Score column that will be computed (e.g. jaccard_similarity)</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Score column that will be computed (e.g. jaccard_similarity)</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -1969,7 +2080,11 @@ <h2 id="src.pheval.utils.semsim_utils.semsim_analysis" class="doc doc-heading">
           <td>
                 <code>bool</code>
           </td>
-          <td><p>Whether the difference is absolute (True) or percentage (False).</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Whether the difference is absolute (True) or percentage (False).</p>
+            </div>
+          </td>
           <td>
                 <code>True</code>
           </td>
@@ -1977,6 +2092,8 @@ <h2 id="src.pheval.utils.semsim_utils.semsim_analysis" class="doc doc-heading">
     </tbody>
   </table>
 
+
+
   <p><strong>Returns:</strong></p>
   <table>
     <thead>
@@ -1988,16 +2105,20 @@ <h2 id="src.pheval.utils.semsim_utils.semsim_analysis" class="doc doc-heading">
     <tbody>
         <tr>
           <td>
-                <code><span title="pandas">pd</span>.<span title="pandas.DataFrame">DataFrame</span></code>
+                <code><span title="pandas.DataFrame">DataFrame</span></code>
+          </td>
+          <td>
+            <div class="doc-md-description">
+              <p>[pd.DataFrame]: DataFrame with the differences between two semantic similarity profiles</p>
+            </div>
           </td>
-          <td><p>[pd.DataFrame]: DataFrame with the differences between two semantic similarity profiles</p></td>
         </tr>
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/semsim_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 94</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/semsim_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 94</span>
 <span class="normal"> 95</span>
 <span class="normal"> 96</span>
 <span class="normal"> 97</span>
@@ -2053,17 +2174,18 @@ <h2 id="src.pheval.utils.semsim_utils.semsim_analysis" class="doc doc-heading">
     <span class="n">diff_df</span> <span class="o">=</span> <span class="n">diff_semsim</span><span class="p">(</span><span class="n">semsim_left</span><span class="p">,</span> <span class="n">semsim_right</span><span class="p">,</span> <span class="n">score_column</span><span class="p">,</span> <span class="n">absolute_diff</span><span class="p">)</span>
     <span class="k">return</span> <span class="n">filter_non_0_score</span><span class="p">(</span><span class="n">diff_df</span><span class="p">,</span> <span class="s2">&quot;diff&quot;</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.utils.semsim_utils.semsim_heatmap_plot" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">semsim_heatmap_plot</span><span class="p">(</span><span class="n">semsim_left</span><span class="p">,</span> <span class="n">semsim_right</span><span class="p">,</span> <span class="n">score_column</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">semsim_heatmap_plot</span><span class="p">(</span><span class="n">semsim_left</span><span class="p">,</span> <span class="n">semsim_right</span><span class="p">,</span> <span class="n">score_column</span><span class="p">)</span></code>
 
 </h2>
 
@@ -2072,6 +2194,8 @@ <h2 id="src.pheval.utils.semsim_utils.semsim_heatmap_plot" class="doc doc-headin
   
       <p>Plots semantic similarity profiles heatmap</p>
 
+
+
   <p><strong>Parameters:</strong></p>
   <table>
     <thead>
@@ -2088,7 +2212,11 @@ <h2 id="src.pheval.utils.semsim_utils.semsim_heatmap_plot" class="doc doc-headin
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>File path of the first semantic similarity profile</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>File path of the first semantic similarity profile</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2098,7 +2226,11 @@ <h2 id="src.pheval.utils.semsim_utils.semsim_heatmap_plot" class="doc doc-headin
           <td>
                 <code><span title="pathlib.Path">Path</span></code>
           </td>
-          <td><p>File path of the second semantic similarity profile</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>File path of the second semantic similarity profile</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2108,7 +2240,11 @@ <h2 id="src.pheval.utils.semsim_utils.semsim_heatmap_plot" class="doc doc-headin
           <td>
                 <code>str</code>
           </td>
-          <td><p>Score column that will be computed (e.g. jaccard_similarity)</p></td>
+          <td>
+            <div class="doc-md-description">
+              <p>Score column that will be computed (e.g. jaccard_similarity)</p>
+            </div>
+          </td>
           <td>
               <em>required</em>
           </td>
@@ -2116,9 +2252,9 @@ <h2 id="src.pheval.utils.semsim_utils.semsim_heatmap_plot" class="doc doc-headin
     </tbody>
   </table>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/semsim_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">80</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/semsim_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">80</span>
 <span class="normal">81</span>
 <span class="normal">82</span>
 <span class="normal">83</span>
@@ -2142,80 +2278,34 @@ <h2 id="src.pheval.utils.semsim_utils.semsim_heatmap_plot" class="doc doc-headin
     <span class="n">fig</span> <span class="o">=</span> <span class="n">px</span><span class="o">.</span><span class="n">imshow</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">text_auto</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span>
     <span class="n">fig</span><span class="o">.</span><span class="n">show</span><span class="p">()</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.utils.semsim_utils.validate_semsim_file_comparison" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">validate_semsim_file_comparison</span><span class="p">(</span><span class="n">semsim_left</span><span class="p">,</span> <span class="n">semsim_right</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">validate_semsim_file_comparison</span><span class="p">(</span><span class="n">semsim_left</span><span class="p">,</span> <span class="n">semsim_right</span><span class="p">)</span></code>
 
 </h2>
 
 
   <div class="doc doc-contents ">
   
-      <p>Checks if files exist and whether they're different</p>
-
-  <p><strong>Parameters:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Name</th>
-        <th>Type</th>
-        <th>Description</th>
-        <th>Default</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-          <td><code>semsim_left</code></td>
-          <td>
-                <code><span title="pathlib.Path">Path</span></code>
-          </td>
-          <td><p>File path of the first semantic similarity profile</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>semsim_right</code></td>
-          <td>
-                <code><span title="pathlib.Path">Path</span></code>
-          </td>
-          <td><p>File path of the second semantic similarity profile</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-    </tbody>
-  </table>
-
-  <p><strong>Raises:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Type</th>
-        <th>Description</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-          <td>
-                <code>Exception</code>
-          </td>
-          <td><p>FileNotFoundException</p></td>
-        </tr>
-    </tbody>
-  </table>
+      <p>Checks if files exist and whether they're different
+Args:
+    semsim_left (Path): File path of the first semantic similarity profile
+    semsim_right (Path): File path of the second semantic similarity profile
+Raises:
+    Exception: FileNotFoundException</p>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/semsim_utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">124</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/semsim_utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">124</span>
 <span class="normal">125</span>
 <span class="normal">126</span>
 <span class="normal">127</span>
@@ -2239,7 +2329,7 @@ <h2 id="src.pheval.utils.semsim_utils.validate_semsim_file_comparison" class="do
         <span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="n">errmsg</span><span class="p">)</span>
     <span class="n">file_utils</span><span class="o">.</span><span class="n">ensure_file_exists</span><span class="p">(</span><span class="n">semsim_left</span><span class="p">,</span> <span class="n">semsim_right</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
diff --git a/api/pheval/utils/utils/index.html b/api/pheval/utils/utils/index.html
index b6d67a6db..654b7e880 100644
--- a/api/pheval/utils/utils/index.html
+++ b/api/pheval/utils/utils/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/api/pheval/utils/utils/">
       
       <link rel="icon" href="../../../../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -1034,28 +1034,28 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.utils" class="md-nav__link">
-    src.pheval.utils.utils
+    utils
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.utils.rand" class="md-nav__link">
-    rand()
+    rand
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.utils.semsim_scramble" class="md-nav__link">
-    semsim_scramble()
+    semsim_scramble
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.utils.semsim_scramble_df" class="md-nav__link">
-    semsim_scramble_df()
+    semsim_scramble_df
   </a>
   
 </li>
@@ -1228,28 +1228,28 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.utils" class="md-nav__link">
-    src.pheval.utils.utils
+    utils
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.utils.rand" class="md-nav__link">
-    rand()
+    rand
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.utils.semsim_scramble" class="md-nav__link">
-    semsim_scramble()
+    semsim_scramble
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.utils.semsim_scramble_df" class="md-nav__link">
-    semsim_scramble_df()
+    semsim_scramble_df
   </a>
   
 </li>
@@ -1279,6 +1279,7 @@ <h1>Utils</h1>
 <div class="doc doc-object doc-module">
 
 
+
 <a id="src.pheval.utils.utils"></a>
   <div class="doc doc-contents first">
   
@@ -1296,95 +1297,31 @@ <h1>Utils</h1>
 
 
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.utils.utils.rand" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">rand</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">min_num</span><span class="p">,</span> <span class="n">max_num</span><span class="p">,</span> <span class="n">scramble_factor</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">rand</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">min_num</span><span class="p">,</span> <span class="n">max_num</span><span class="p">,</span> <span class="n">scramble_factor</span><span class="p">)</span></code>
 
 </h2>
 
 
   <div class="doc doc-contents ">
   
-      <p>Numeric scrambling</p>
-
-  <p><strong>Parameters:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Name</th>
-        <th>Type</th>
-        <th>Description</th>
-        <th>Default</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-          <td><code>df</code></td>
-          <td>
-                <code><span title="pandas">pd</span>.<span title="pandas.DataFrame">DataFrame</span></code>
-          </td>
-          <td><p>dataframe records</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>min_num</code></td>
-          <td>
-                <code>int</code>
-          </td>
-          <td><p>min value from this records</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>max_num</code></td>
-          <td>
-                <code>int</code>
-          </td>
-          <td><p>max value from this records</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>scramble_factor</code></td>
-          <td>
-                <code>float</code>
-          </td>
-          <td><p>scramble factor scalar</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-    </tbody>
-  </table>
-
-  <p><strong>Returns:</strong></p>
-  <table>
-    <thead>
-      <tr>
-<th>Name</th>        <th>Type</th>
-        <th>Description</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-<td><code>float</code></td>          <td>
-                <code>float</code>
-          </td>
-          <td><p>randomized number</p></td>
-        </tr>
-    </tbody>
-  </table>
-
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">13</span>
+      <p>Numeric scrambling
+Args:
+    df (pd.DataFrame): dataframe records
+    min_num (int): min value from this records
+    max_num (int): max value from this records
+    scramble_factor (float): scramble factor scalar
+Returns:
+    float: randomized number</p>
+
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">13</span>
 <span class="normal">14</span>
 <span class="normal">15</span>
 <span class="normal">16</span>
@@ -1416,17 +1353,18 @@ <h2 id="src.pheval.utils.utils.rand" class="doc doc-heading">
         <span class="n">info_log</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">exc_info</span><span class="o">=</span><span class="n">err</span><span class="p">)</span>
         <span class="k">return</span> <span class="n">df</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.utils.utils.semsim_scramble" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">semsim_scramble</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="n">output</span><span class="p">,</span> <span class="n">columns_to_be_scrambled</span><span class="p">,</span> <span class="n">scramble_factor</span><span class="o">=</span><span class="mf">0.5</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">semsim_scramble</span><span class="p">(</span><span class="nb">input</span><span class="p">,</span> <span class="n">output</span><span class="p">,</span> <span class="n">columns_to_be_scrambled</span><span class="p">,</span> <span class="n">scramble_factor</span><span class="o">=</span><span class="mf">0.5</span><span class="p">)</span></code>
 
 </h2>
 
@@ -1446,9 +1384,9 @@ <h2 id="src.pheval.utils.utils.semsim_scramble" class="doc doc-heading">
     Returns:
         pd.Dataframe: scrambled dataframe</p>
 
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">31</span>
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">31</span>
 <span class="normal">32</span>
 <span class="normal">33</span>
 <span class="normal">34</span>
@@ -1494,80 +1432,35 @@ <h2 id="src.pheval.utils.utils.semsim_scramble" class="doc doc-heading">
     <span class="n">dataframe</span> <span class="o">=</span> <span class="n">semsim_scramble_df</span><span class="p">(</span><span class="n">semsim</span><span class="p">,</span> <span class="n">columns_to_be_scrambled</span><span class="p">,</span> <span class="n">scramble_factor</span><span class="p">)</span>
     <span class="n">dataframe</span><span class="o">.</span><span class="n">to_csv</span><span class="p">(</span><span class="n">output</span><span class="p">,</span> <span class="n">sep</span><span class="o">=</span><span class="s2">&quot;</span><span class="se">\t</span><span class="s2">&quot;</span><span class="p">,</span> <span class="n">index</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
 
+
 <div class="doc doc-object doc-function">
 
 
 
 <h2 id="src.pheval.utils.utils.semsim_scramble_df" class="doc doc-heading">
-<code class="highlight language-python"><span class="n">semsim_scramble_df</span><span class="p">(</span><span class="n">dataframe</span><span class="p">,</span> <span class="n">columns_to_be_scrambled</span><span class="p">,</span> <span class="n">scramble_factor</span><span class="p">)</span></code>
+          <code class="highlight language-python"><span class="n">semsim_scramble_df</span><span class="p">(</span><span class="n">dataframe</span><span class="p">,</span> <span class="n">columns_to_be_scrambled</span><span class="p">,</span> <span class="n">scramble_factor</span><span class="p">)</span></code>
 
 </h2>
 
 
   <div class="doc doc-contents ">
   
-      <p>scramble_semsim_df</p>
-
-  <p><strong>Parameters:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Name</th>
-        <th>Type</th>
-        <th>Description</th>
-        <th>Default</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-          <td><code>dataframe</code></td>
-          <td>
-                <code><span title="pandas">pd</span>.<span title="pandas.DataFrame">DataFrame</span></code>
-          </td>
-          <td><p>dataframe that contains semsim profile</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>columns_to_be_scrambled</code></td>
-          <td>
-                <code><span title="typing.List">List</span>[str]</code>
-          </td>
-          <td></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-    </tbody>
-  </table>
-
-  <p><strong>Returns:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Type</th>
-        <th>Description</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-          <td>
-                <code><span title="pandas">pd</span>.<span title="pandas.DataFrame">DataFrame</span></code>
-          </td>
-          <td><p>pd.Dataframe: scrambled dataframe</p></td>
-        </tr>
-    </tbody>
-  </table>
-
-      <details class="quote">
-        <summary>Source code in <code>src/pheval/utils/utils.py</code></summary>
-        <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">56</span>
+      <p>scramble_semsim_df
+Args:
+    dataframe (pd.DataFrame): dataframe that contains semsim profile
+    scramble_factor (float) scalar scramble factor
+    columns_to_be_scrambled (List[str]):
+Returns:
+    pd.Dataframe: scrambled dataframe</p>
+
+          <details class="quote">
+            <summary>Source code in <code>src/pheval/utils/utils.py</code></summary>
+            <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">56</span>
 <span class="normal">57</span>
 <span class="normal">58</span>
 <span class="normal">59</span>
@@ -1603,7 +1496,7 @@ <h2 id="src.pheval.utils.utils.semsim_scramble_df" class="doc doc-heading">
         <span class="n">dataframe</span><span class="p">[</span><span class="n">col</span><span class="p">]</span> <span class="o">=</span> <span class="n">dataframe</span><span class="p">[</span><span class="n">col</span><span class="p">]</span><span class="o">.</span><span class="n">apply</span><span class="p">(</span><span class="n">rand</span><span class="p">,</span> <span class="n">args</span><span class="o">=</span><span class="p">(</span><span class="n">min_num</span><span class="p">,</span> <span class="n">max_num</span><span class="p">,</span> <span class="n">scramble_factor</span><span class="p">))</span>
     <span class="k">return</span> <span class="n">dataframe</span>
 </code></pre></div></td></tr></table></div>
-      </details>
+          </details>
   </div>
 
 </div>
diff --git a/assets/_mkdocstrings.css b/assets/_mkdocstrings.css
index a65078d02..57a23e10f 100644
--- a/assets/_mkdocstrings.css
+++ b/assets/_mkdocstrings.css
@@ -1,18 +1,13 @@
 
-/* Don't capitalize names. */
-h5.doc-heading {
-  text-transform: none !important;
-}
-
-/* Avoid breaking parameters name, etc. in table cells. */
+/* Avoid breaking parameter names, etc. in table cells. */
 .doc-contents td code {
   word-break: normal !important;
 }
 
-/* For pieces of Markdown rendered in table cells. */
-.doc-contents td p {
-  margin-top: 0 !important;
-  margin-bottom: 0 !important;
+/* No line break before first paragraph of descriptions. */
+.doc-md-description,
+.doc-md-description>p:first-child {
+  display: inline;
 }
 
 /* Max width for docstring sections tables. */
@@ -21,16 +16,99 @@ h5.doc-heading {
   display: table !important;
   width: 100%;
 }
+
 .doc .md-typeset__table tr {
   display: table-row;
 }
 
-/* Avoid line breaks in rendered fields. */
-.field-body p {
-  display: inline;
-}
-
 /* Defaults in Spacy table style. */
 .doc-param-default {
   float: right;
 }
+
+/* Symbols in Navigation and ToC. */
+:root,
+[data-md-color-scheme="default"] {
+  --doc-symbol-attribute-fg-color: #953800;
+  --doc-symbol-function-fg-color: #8250df;
+  --doc-symbol-method-fg-color: #8250df;
+  --doc-symbol-class-fg-color: #0550ae;
+  --doc-symbol-module-fg-color: #5cad0f;
+
+  --doc-symbol-attribute-bg-color: #9538001a;
+  --doc-symbol-function-bg-color: #8250df1a;
+  --doc-symbol-method-bg-color: #8250df1a;
+  --doc-symbol-class-bg-color: #0550ae1a;
+  --doc-symbol-module-bg-color: #5cad0f1a;
+}
+
+[data-md-color-scheme="slate"] {
+  --doc-symbol-attribute-fg-color: #ffa657;
+  --doc-symbol-function-fg-color: #d2a8ff;
+  --doc-symbol-method-fg-color: #d2a8ff;
+  --doc-symbol-class-fg-color: #79c0ff;
+  --doc-symbol-module-fg-color: #baff79;
+
+  --doc-symbol-attribute-bg-color: #ffa6571a;
+  --doc-symbol-function-bg-color: #d2a8ff1a;
+  --doc-symbol-method-bg-color: #d2a8ff1a;
+  --doc-symbol-class-bg-color: #79c0ff1a;
+  --doc-symbol-module-bg-color: #baff791a;
+}
+
+code.doc-symbol {
+  border-radius: .1rem;
+  font-size: .85em;
+  padding: 0 .3em;
+  font-weight: bold;
+}
+
+code.doc-symbol-attribute {
+  color: var(--doc-symbol-attribute-fg-color);
+  background-color: var(--doc-symbol-attribute-bg-color);
+}
+
+code.doc-symbol-attribute::after {
+  content: "attr";
+}
+
+code.doc-symbol-function {
+  color: var(--doc-symbol-function-fg-color);
+  background-color: var(--doc-symbol-function-bg-color);
+}
+
+code.doc-symbol-function::after {
+  content: "func";
+}
+
+code.doc-symbol-method {
+  color: var(--doc-symbol-method-fg-color);
+  background-color: var(--doc-symbol-method-bg-color);
+}
+
+code.doc-symbol-method::after {
+  content: "meth";
+}
+
+code.doc-symbol-class {
+  color: var(--doc-symbol-class-fg-color);
+  background-color: var(--doc-symbol-class-bg-color);
+}
+
+code.doc-symbol-class::after {
+  content: "class";
+}
+
+code.doc-symbol-module {
+  color: var(--doc-symbol-module-fg-color);
+  background-color: var(--doc-symbol-module-bg-color);
+}
+
+code.doc-symbol-module::after {
+  content: "mod";
+}
+
+.doc-signature .autorefs {
+  color: inherit;
+  border-bottom: 1px dotted currentcolor;
+}
diff --git a/contact/index.html b/contact/index.html
index 65bf6eeb6..361582c00 100644
--- a/contact/index.html
+++ b/contact/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/contact/">
       
       <link rel="icon" href="../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
diff --git a/contributing/index.html b/contributing/index.html
index c23ff6239..e79080273 100644
--- a/contributing/index.html
+++ b/contributing/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/contributing/">
       
       <link rel="icon" href="../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
diff --git a/developing_a_pheval_plugin/index.html b/developing_a_pheval_plugin/index.html
index d0bb7b6b4..cc0c4369c 100644
--- a/developing_a_pheval_plugin/index.html
+++ b/developing_a_pheval_plugin/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/developing_a_pheval_plugin/">
       
       <link rel="icon" href="../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
@@ -1098,7 +1098,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.runners.runner.PhEvalRunner" class="md-nav__link">
-    src.pheval.runners.runner.PhEvalRunner
+    PhEvalRunner
   </a>
   
 </li>
@@ -1200,10 +1200,10 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil" class="md-nav__link">
-    src.pheval.utils.phenopacket_utils.PhenopacketUtil
+    PhenopacketUtil
   </a>
   
-    <nav class="md-nav" aria-label="src.pheval.utils.phenopacket_utils.PhenopacketUtil">
+    <nav class="md-nav" aria-label="PhenopacketUtil">
       <ul class="md-nav__list">
         
           <li class="md-nav__item">
@@ -1227,24 +1227,24 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.post_processing.post_processing.PhEvalGeneResult" class="md-nav__link">
-    src.pheval.post_processing.post_processing.PhEvalGeneResult
+    PhEvalGeneResult
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.post_processing.post_processing.PhEvalVariantResult" class="md-nav__link">
-    src.pheval.post_processing.post_processing.PhEvalVariantResult
+    PhEvalVariantResult
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.post_processing.post_processing.PhEvalDiseaseResult" class="md-nav__link">
-    src.pheval.post_processing.post_processing.PhEvalDiseaseResult
+    PhEvalDiseaseResult
   </a>
   
-    <nav class="md-nav" aria-label="src.pheval.post_processing.post_processing.PhEvalDiseaseResult">
+    <nav class="md-nav" aria-label="PhEvalDiseaseResult">
       <ul class="md-nav__list">
         
           <li class="md-nav__item">
@@ -1388,7 +1388,7 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.runners.runner.PhEvalRunner" class="md-nav__link">
-    src.pheval.runners.runner.PhEvalRunner
+    PhEvalRunner
   </a>
   
 </li>
@@ -1490,10 +1490,10 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.utils.phenopacket_utils.PhenopacketUtil" class="md-nav__link">
-    src.pheval.utils.phenopacket_utils.PhenopacketUtil
+    PhenopacketUtil
   </a>
   
-    <nav class="md-nav" aria-label="src.pheval.utils.phenopacket_utils.PhenopacketUtil">
+    <nav class="md-nav" aria-label="PhenopacketUtil">
       <ul class="md-nav__list">
         
           <li class="md-nav__item">
@@ -1517,24 +1517,24 @@
       
         <li class="md-nav__item">
   <a href="#src.pheval.post_processing.post_processing.PhEvalGeneResult" class="md-nav__link">
-    src.pheval.post_processing.post_processing.PhEvalGeneResult
+    PhEvalGeneResult
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.post_processing.post_processing.PhEvalVariantResult" class="md-nav__link">
-    src.pheval.post_processing.post_processing.PhEvalVariantResult
+    PhEvalVariantResult
   </a>
   
 </li>
       
         <li class="md-nav__item">
   <a href="#src.pheval.post_processing.post_processing.PhEvalDiseaseResult" class="md-nav__link">
-    src.pheval.post_processing.post_processing.PhEvalDiseaseResult
+    PhEvalDiseaseResult
   </a>
   
-    <nav class="md-nav" aria-label="src.pheval.post_processing.post_processing.PhEvalDiseaseResult">
+    <nav class="md-nav" aria-label="PhEvalDiseaseResult">
       <ul class="md-nav__list">
         
           <li class="md-nav__item">
@@ -1586,18 +1586,18 @@ <h2 id="description">Description</h2>
 <div class="doc doc-object doc-class">
 
 
+
 <a id="src.pheval.runners.runner.PhEvalRunner"></a>
   <div class="doc doc-contents first">
-      <p class="doc doc-class-bases">
-        Bases: <code><span title="abc.ABC">ABC</span></code></p>
+          <p class="doc doc-class-bases">
+            Bases: <code><span title="abc.ABC">ABC</span></code></p>
 
   
       <p>PhEvalRunner Class</p>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/runners/runner.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 12</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/runners/runner.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal"> 12</span>
 <span class="normal"> 13</span>
 <span class="normal"> 14</span>
 <span class="normal"> 15</span>
@@ -1829,7 +1829,7 @@ <h2 id="description">Description</h2>
 <span class="w">        </span><span class="sd">&quot;&quot;&quot;Construct run output meta data&quot;&quot;&quot;</span>
         <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">meta_data</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -1849,6 +1849,7 @@ <h2 id="description">Description</h2>
 
   </div>
 
+
 </div><hr />
 <h2 id="step-by-step-plugin-development-process">Step-by-Step Plugin Development Process</h2>
 <p>The plugin structure is derived from a <a href="https://cookiecutter.readthedocs.io/en/stable/">cookiecutter</a> template, <a href="https://github.com/hrshdhgd/sphintoxetry-cookiecutter">Sphintoxetry-cookiecutter</a>, and it uses <a href="https://www.sphinx-doc.org/en/master/">Sphinx</a>, <a href="https://tox.wiki/en/latest/">tox</a> and <a href="https://python-poetry.org">poetry</a> as core dependencies.
@@ -1911,8 +1912,8 @@ <h3 id="3-implement-pheval-custom-runner">3. Implement PhEval Custom Runner</h3>
         <span class="nb">print</span><span class="p">(</span><span class="s2">&quot;post processing&quot;</span><span class="p">)</span>
 </code></pre></div>
 <h3 id="4-add-pheval-plugins-section-to-the-pyprojecttoml-file">4. Add PhEval Plugins section to the pyproject.toml file</h3>
-<div class="highlight"><pre><span></span><code><span class="k">[tool.poetry.plugins.&quot;pheval.plugins&quot;]</span>
-<span class="n">customrunner</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">&quot;pheval_plugin_example.runner:CustomPhevalRunner&quot;</span>
+<div class="highlight"><pre><span></span><code><span class="k">[tool.poetry.plugins.</span><span class="s2">&quot;pheval.plugins&quot;</span><span class="k">]</span>
+<span class="n">customrunner</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s2">&quot;pheval_plugin_example.runner:CustomPhevalRunner&quot;</span>
 </code></pre></div>
 <p><mark>Replace the value above with the path to your custom runner plugin</mark></p>
 <h3 id="5-implementing-pheval-helper-methods">5. Implementing PhEval helper methods</h3>
@@ -1925,16 +1926,16 @@ <h4 id="utility-methods">Utility methods</h4>
 <div class="doc doc-object doc-class">
 
 
+
 <a id="src.pheval.utils.phenopacket_utils.PhenopacketUtil"></a>
   <div class="doc doc-contents first">
 
   
       <p>Class for retrieving data from a Phenopacket or Family object</p>
 
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">219</span>
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/utils/phenopacket_utils.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">219</span>
 <span class="normal">220</span>
 <span class="normal">221</span>
 <span class="normal">222</span>
@@ -2566,7 +2567,7 @@ <h4 id="utility-methods">Utility methods</h4>
             <span class="k">return</span> <span class="kc">True</span>
         <span class="k">return</span> <span class="kc">False</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -2586,6 +2587,7 @@ <h4 id="utility-methods">Utility methods</h4>
 
   </div>
 
+
 </div><hr />
 <p><code>PhenopacketUtil</code> proves particularly beneficial in scenarios where the tool for which you're crafting a runner implementation does not directly accept Phenopackets as inputs. Instead, it might require elements—such as HPO IDs— via the command-line interface (CLI). In this context, leveraging PhenopacketUtil within the runner's preparation phase enables the extraction of observed phenotypic features from the Phenopacket input, facilitating seamless processing.</p>
 <p>An example of how this could be implemented is outlined here:</p>
@@ -2668,68 +2670,26 @@ <h4 id="post-processing-methods">Post-processing methods</h4>
 <div class="doc doc-object doc-class">
 
 
+
 <a id="src.pheval.post_processing.post_processing.PhEvalGeneResult"></a>
   <div class="doc doc-contents first">
-      <p class="doc doc-class-bases">
-        Bases: <code><a class="autorefs autorefs-internal" title="src.pheval.post_processing.post_processing.PhEvalResult" href="../api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.PhEvalResult">PhEvalResult</a></code></p>
-
-  
-      <p>Minimal data required from tool-specific output for gene prioritisation result</p>
-
-  <p><strong>Parameters:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Name</th>
-        <th>Type</th>
-        <th>Description</th>
-        <th>Default</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-          <td><code>gene_symbol</code></td>
-          <td>
-                <code><span title="typing.Union">Union</span>[<span title="typing.List">List</span>[str], str]</code>
-          </td>
-          <td><p>The gene symbol(s) for the result entry</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>gene_identifier</code></td>
-          <td>
-                <code><span title="typing.Union">Union</span>[<span title="typing.List">List</span>[str], str]</code>
-          </td>
-          <td><p>The ENSEMBL gene identifier(s) for the result entry</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>score</code></td>
-          <td>
-                <code>float</code>
-          </td>
-          <td><p>The score for the gene result entry</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-    </tbody>
-  </table>
-
-<details class="notes">
-  <summary>Notes</summary>
-  <p>While we recommend providing the gene identifier in the ENSEMBL namespace,
-any matching format used in Phenopacket interpretations is acceptable for result matching purposes
-in the analysis.</p>
-</details>
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">30</span>
+          <p class="doc doc-class-bases">
+            Bases: <code><a class="autorefs autorefs-internal" title="src.pheval.post_processing.post_processing.PhEvalResult" href="../api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.PhEvalResult">PhEvalResult</a></code></p>
+
+  
+      <p>Minimal data required from tool-specific output for gene prioritisation result
+Args:
+    gene_symbol (Union[List[str], str]): The gene symbol(s) for the result entry
+    gene_identifier (Union[List[str], str]): The ENSEMBL gene identifier(s) for the result entry
+    score (float): The score for the gene result entry
+Notes:
+    While we recommend providing the gene identifier in the ENSEMBL namespace,
+    any matching format used in Phenopacket interpretations is acceptable for result matching purposes
+    in the analysis.</p>
+
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">30</span>
 <span class="normal">31</span>
 <span class="normal">32</span>
 <span class="normal">33</span>
@@ -2761,7 +2721,7 @@ <h4 id="post-processing-methods">Post-processing methods</h4>
     <span class="n">gene_identifier</span><span class="p">:</span> <span class="n">Union</span><span class="p">[</span><span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="nb">str</span><span class="p">]</span>
     <span class="n">score</span><span class="p">:</span> <span class="nb">float</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -2781,104 +2741,38 @@ <h4 id="post-processing-methods">Post-processing methods</h4>
 
   </div>
 
+
 </div><hr />
 
 
 <div class="doc doc-object doc-class">
 
 
+
 <a id="src.pheval.post_processing.post_processing.PhEvalVariantResult"></a>
   <div class="doc doc-contents first">
-      <p class="doc doc-class-bases">
-        Bases: <code><a class="autorefs autorefs-internal" title="src.pheval.post_processing.post_processing.PhEvalResult" href="../api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.PhEvalResult">PhEvalResult</a></code></p>
-
-  
-      <p>Minimal data required from tool-specific output for variant prioritisation</p>
-
-  <p><strong>Parameters:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Name</th>
-        <th>Type</th>
-        <th>Description</th>
-        <th>Default</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-          <td><code>chromosome</code></td>
-          <td>
-                <code>str</code>
-          </td>
-          <td><p>The chromosome position of the variant recommended to be provided in the following format.</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>start</code></td>
-          <td>
-                <code>int</code>
-          </td>
-          <td><p>The start position of the variant</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>end</code></td>
-          <td>
-                <code>int</code>
-          </td>
-          <td><p>The end position of the variant</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>ref</code></td>
-          <td>
-                <code>str</code>
-          </td>
-          <td><p>The reference allele of the variant</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>alt</code></td>
-          <td>
-                <code>str</code>
-          </td>
-          <td><p>The alternate allele of the variant</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>score</code></td>
-          <td>
-                <code>float</code>
-          </td>
-          <td><p>The score for the variant result entry</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-    </tbody>
-  </table>
-
-<details class="notes">
-  <summary>Notes</summary>
-  <p>While we recommend providing the variant's chromosome in the specified format,
-any matching format used in Phenopacket interpretations is acceptable for result matching purposes
-in the analysis.</p>
-</details>
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">75</span>
+          <p class="doc doc-class-bases">
+            Bases: <code><a class="autorefs autorefs-internal" title="src.pheval.post_processing.post_processing.PhEvalResult" href="../api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.PhEvalResult">PhEvalResult</a></code></p>
+
+  
+      <p>Minimal data required from tool-specific output for variant prioritisation
+Args:
+    chromosome (str): The chromosome position of the variant recommended to be provided in the following format.
+    This includes numerical designations from 1 to 22 representing autosomal chromosomes,
+    as well as the sex chromosomes X and Y, and the mitochondrial chromosome MT.
+    start (int): The start position of the variant
+    end (int): The end position of the variant
+    ref (str): The reference allele of the variant
+    alt (str): The alternate allele of the variant
+    score (float): The score for the variant result entry
+Notes:
+    While we recommend providing the variant's chromosome in the specified format,
+    any matching format used in Phenopacket interpretations is acceptable for result matching purposes
+    in the analysis.</p>
+
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">75</span>
 <span class="normal">76</span>
 <span class="normal">77</span>
 <span class="normal">78</span>
@@ -2926,7 +2820,7 @@ <h4 id="post-processing-methods">Post-processing methods</h4>
     <span class="n">alt</span><span class="p">:</span> <span class="nb">str</span>
     <span class="n">score</span><span class="p">:</span> <span class="nb">float</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -2946,74 +2840,33 @@ <h4 id="post-processing-methods">Post-processing methods</h4>
 
   </div>
 
+
 </div><hr />
 
 
 <div class="doc doc-object doc-class">
 
 
+
 <a id="src.pheval.post_processing.post_processing.PhEvalDiseaseResult"></a>
   <div class="doc doc-contents first">
-      <p class="doc doc-class-bases">
-        Bases: <code><a class="autorefs autorefs-internal" title="src.pheval.post_processing.post_processing.PhEvalResult" href="../api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.PhEvalResult">PhEvalResult</a></code></p>
-
-  
-      <p>Minimal data required from tool-specific output for disease prioritisation</p>
-
-  <p><strong>Parameters:</strong></p>
-  <table>
-    <thead>
-      <tr>
-        <th>Name</th>
-        <th>Type</th>
-        <th>Description</th>
-        <th>Default</th>
-      </tr>
-    </thead>
-    <tbody>
-        <tr>
-          <td><code>disease_name</code></td>
-          <td>
-                <code>str</code>
-          </td>
-          <td><p>Disease name for the result entry</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>disease_identifier</code></td>
-          <td>
-                <code>str</code>
-          </td>
-          <td><p>Identifier for the disease result entry in the OMIM namespace</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-        <tr>
-          <td><code>score</code></td>
-          <td>
-                <code>str</code>
-          </td>
-          <td><p>Score for the disease result entry</p></td>
-          <td>
-              <em>required</em>
-          </td>
-        </tr>
-    </tbody>
-  </table>
-
-<details class="notes">
-  <summary>Notes</summary>
-  <p>While we recommend providing the disease identifier in the OMIM namespace,
-any matching format used in Phenopacket interpretations is acceptable for result matching purposes
-in the analysis.</p>
-</details>
-
-        <details class="quote">
-          <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
-          <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">131</span>
+          <p class="doc doc-class-bases">
+            Bases: <code><a class="autorefs autorefs-internal" title="src.pheval.post_processing.post_processing.PhEvalResult" href="../api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.PhEvalResult">PhEvalResult</a></code></p>
+
+  
+      <p>Minimal data required from tool-specific output for disease prioritisation
+Args:
+    disease_name (str): Disease name for the result entry
+    disease_identifier (str): Identifier for the disease result entry in the OMIM namespace
+    score (str): Score for the disease result entry
+Notes:
+    While we recommend providing the disease identifier in the OMIM namespace,
+    any matching format used in Phenopacket interpretations is acceptable for result matching purposes
+    in the analysis.</p>
+
+            <details class="quote">
+              <summary>Source code in <code>src/pheval/post_processing/post_processing.py</code></summary>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">131</span>
 <span class="normal">132</span>
 <span class="normal">133</span>
 <span class="normal">134</span>
@@ -3045,7 +2898,7 @@ <h4 id="post-processing-methods">Post-processing methods</h4>
     <span class="n">disease_identifier</span><span class="p">:</span> <span class="nb">str</span>
     <span class="n">score</span><span class="p">:</span> <span class="nb">float</span>
 </code></pre></div></td></tr></table></div>
-        </details>
+            </details>
 
   
 
@@ -3065,6 +2918,7 @@ <h4 id="post-processing-methods">Post-processing methods</h4>
 
   </div>
 
+
 </div><hr />
 <p>The <code>generate_pheval_result()</code> can be implemented in your runner to write out the PhEval TSV results.</p>
 <p>An example of how the method can be called is outlined here:</p>
diff --git a/exomiser_pipeline/index.html b/exomiser_pipeline/index.html
index fb8f90e87..d26cfac0e 100644
--- a/exomiser_pipeline/index.html
+++ b/exomiser_pipeline/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/exomiser_pipeline/">
       
       <link rel="icon" href="../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
diff --git a/index.html b/index.html
index 4c642cc3e..538a47cfb 100644
--- a/index.html
+++ b/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/">
       
       <link rel="icon" href="assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
diff --git a/objects.inv b/objects.inv
index 0238e54af..4a8fd24fb 100644
Binary files a/objects.inv and b/objects.inv differ
diff --git a/pipeline/index.html b/pipeline/index.html
index 22d6f7440..b0938d189 100644
--- a/pipeline/index.html
+++ b/pipeline/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/pipeline/">
       
       <link rel="icon" href="../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
diff --git a/plugins/index.html b/plugins/index.html
index 4305c42df..53b653059 100644
--- a/plugins/index.html
+++ b/plugins/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/plugins/">
       
       <link rel="icon" href="../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
diff --git a/roadmap/index.html b/roadmap/index.html
index 2c5028df0..01d936543 100644
--- a/roadmap/index.html
+++ b/roadmap/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/roadmap/">
       
       <link rel="icon" href="../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">
     
     
       
diff --git a/search/search_index.json b/search/search_index.json
index 21d03cf24..f23ff7bde 100644
--- a/search/search_index.json
+++ b/search/search_index.json
@@ -1 +1 @@
-{"config":{"indexing":"full","lang":["en"],"min_search_length":3,"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"Home Introduction PhEval - Phenotypic Inference Evaluation Framework PhEval: Tool-specific processing (VP pipeline) flowchart LR PC-->DP PC[(Phenopackets Corpus)] SSSOM[Semantic Similarity Profiles Mapping Commons]-->|OAK-SEMSIM|DP[Data Prepare] KG[Source data KG - Monarch KG]-->|KGX-BIOLINK|DP[Data Prepare] ONT[Ontologies - Phenio]-->|OAK-ONTO|DP[Data Prepare] DP-->RP[Run Prepare] RP-->PR[PhEval Runner] PR-->DP2[Data Process] ER[Exomiser Runner]-->PR EDP[Exomiser Data Prepare]-->DP ERP[Exomiser Run Prepare]-->RP PPP[Disease-profile similarity prediction Post-process]-->DP2 PV[Phenotype/Variant]-->DP2 GVP[Gene VP Post-process]-->DP2 EPP[Exomiser Post Process]-->GVP GVP-->VPR[VP Report] Quick links: GitHub page","title":"Home"},{"location":"#home","text":"","title":"Home"},{"location":"#introduction","text":"PhEval - Phenotypic Inference Evaluation Framework","title":"Introduction"},{"location":"#pheval-tool-specific-processing-vp-pipeline","text":"flowchart LR PC-->DP PC[(Phenopackets Corpus)] SSSOM[Semantic Similarity Profiles Mapping Commons]-->|OAK-SEMSIM|DP[Data Prepare] KG[Source data KG - Monarch KG]-->|KGX-BIOLINK|DP[Data Prepare] ONT[Ontologies - Phenio]-->|OAK-ONTO|DP[Data Prepare] DP-->RP[Run Prepare] RP-->PR[PhEval Runner] PR-->DP2[Data Process] ER[Exomiser Runner]-->PR EDP[Exomiser Data Prepare]-->DP ERP[Exomiser Run Prepare]-->RP PPP[Disease-profile similarity prediction Post-process]-->DP2 PV[Phenotype/Variant]-->DP2 GVP[Gene VP Post-process]-->DP2 EPP[Exomiser Post Process]-->GVP GVP-->VPR[VP Report] Quick links: GitHub page","title":"PhEval: Tool-specific processing (VP pipeline)"},{"location":"CODE_OF_CONDUCT/","text":"Contributor Covenant Code of Conduct Our Pledge In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. Our Standards Examples of behavior that contributes to creating a positive environment include: Using welcoming and inclusive language Being respectful of differing viewpoints and experiences Gracefully accepting constructive criticism Focusing on what is best for the community Showing empathy towards other community members Examples of unacceptable behavior by participants include: The use of sexualized language or imagery and unwelcome sexual attention or advances Trolling, insulting/derogatory comments, and personal or political attacks Public or private harassment Publishing others' private information, such as a physical or electronic address, without explicit permission Other conduct which could reasonably be considered inappropriate in a professional setting Our Responsibilities Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. Scope This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. Attribution This code of conduct has been derived from the excellent code of conduct of the ATOM project which in turn is adapted from the Contributor Covenant , version 1.4, available at https://contributor-covenant.org/version/1/4","title":"Contributor Covenant Code of Conduct"},{"location":"CODE_OF_CONDUCT/#contributor-covenant-code-of-conduct","text":"","title":"Contributor Covenant Code of Conduct"},{"location":"CODE_OF_CONDUCT/#our-pledge","text":"In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.","title":"Our Pledge"},{"location":"CODE_OF_CONDUCT/#our-standards","text":"Examples of behavior that contributes to creating a positive environment include: Using welcoming and inclusive language Being respectful of differing viewpoints and experiences Gracefully accepting constructive criticism Focusing on what is best for the community Showing empathy towards other community members Examples of unacceptable behavior by participants include: The use of sexualized language or imagery and unwelcome sexual attention or advances Trolling, insulting/derogatory comments, and personal or political attacks Public or private harassment Publishing others' private information, such as a physical or electronic address, without explicit permission Other conduct which could reasonably be considered inappropriate in a professional setting","title":"Our Standards"},{"location":"CODE_OF_CONDUCT/#our-responsibilities","text":"Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.","title":"Our Responsibilities"},{"location":"CODE_OF_CONDUCT/#scope","text":"This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.","title":"Scope"},{"location":"CODE_OF_CONDUCT/#enforcement","text":"Instances of abusive, harassing, or otherwise unacceptable behavior. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.","title":"Enforcement"},{"location":"CODE_OF_CONDUCT/#attribution","text":"This code of conduct has been derived from the excellent code of conduct of the ATOM project which in turn is adapted from the Contributor Covenant , version 1.4, available at https://contributor-covenant.org/version/1/4","title":"Attribution"},{"location":"about/","text":"PhEval - Phenotypic Inference Evaluation Framework Many variant prioritization tools (such as Exomiser and other computational approaches) rely on ontologies and phenotype matching, sometimes involving complex processes such as cross-species inference. The performance of such tools is exceedingly hard to evaluate because of the many factors involved: changes to the structure of the ontology, cross-species mappings, and semantic similarity algorithms can have significant consequences. Furthermore, the lack of suitable real-world problems/corpora leads to the situation that many algorithms are evaluated using simulations, which may fail to capture real-world scenarios. The lack of an evaluation framework that enables studying effects on data and knowledge inputs on real-world problems makes it difficult to optimize algorithms. To this end, we are developing a modular Phenotypic Inference Evaluation Framework (PhEval), which is delivered as a community resource.","title":"About"},{"location":"about/#pheval-phenotypic-inference-evaluation-framework","text":"Many variant prioritization tools (such as Exomiser and other computational approaches) rely on ontologies and phenotype matching, sometimes involving complex processes such as cross-species inference. The performance of such tools is exceedingly hard to evaluate because of the many factors involved: changes to the structure of the ontology, cross-species mappings, and semantic similarity algorithms can have significant consequences. Furthermore, the lack of suitable real-world problems/corpora leads to the situation that many algorithms are evaluated using simulations, which may fail to capture real-world scenarios. The lack of an evaluation framework that enables studying effects on data and knowledge inputs on real-world problems makes it difficult to optimize algorithms. To this end, we are developing a modular Phenotypic Inference Evaluation Framework (PhEval), which is delivered as a community resource.","title":"PhEval - Phenotypic Inference Evaluation Framework"},{"location":"contact/","text":"Contact The preferred way to contact the PhEval team is through the issue tracker (for problems with PhEval) or the GitHub discussions (for general questions). You can find any of the members of the PhEval core team on GitHub: https://github.com/orgs/monarch-initiative/teams/pheval-team Their GitHub profiles usually also provide email addresses.","title":"Contact Us"},{"location":"contact/#contact","text":"The preferred way to contact the PhEval team is through the issue tracker (for problems with PhEval) or the GitHub discussions (for general questions). You can find any of the members of the PhEval core team on GitHub: https://github.com/orgs/monarch-initiative/teams/pheval-team Their GitHub profiles usually also provide email addresses.","title":"Contact"},{"location":"contributing/","text":"Contributions First of all: Thank you for taking the time to contribute! The following is a set of guidelines for contributing to the PhEval framework. These guidelines are not strict rules. Use your best judgment, and feel free to propose changes to this document in a pull request. Table Of Contents Contributions Table Of Contents Code of Conduct Guidelines for Contributions and Requests Reporting problems with the data model Code of Conduct The monarch-technical-documentation team strives to create a welcoming environment for editors, users and other contributors. Please carefully read our Code of Conduct . Guidelines for Contributions and Requests Reporting problems with the data model Please use our Issue Tracker for reporting problems with the ontology.","title":"Contributions"},{"location":"contributing/#contributions","text":"First of all: Thank you for taking the time to contribute! The following is a set of guidelines for contributing to the PhEval framework. These guidelines are not strict rules. Use your best judgment, and feel free to propose changes to this document in a pull request.","title":"Contributions"},{"location":"contributing/#table-of-contents","text":"Contributions Table Of Contents Code of Conduct Guidelines for Contributions and Requests Reporting problems with the data model","title":"Table Of Contents"},{"location":"contributing/#code-of-conduct","text":"The monarch-technical-documentation team strives to create a welcoming environment for editors, users and other contributors. Please carefully read our Code of Conduct .","title":"Code of Conduct"},{"location":"contributing/#guidelines-for-contributions-and-requests","text":"","title":"Guidelines for Contributions and Requests"},{"location":"contributing/#reporting-problems-with-the-data-model","text":"Please use our Issue Tracker for reporting problems with the ontology.","title":"Reporting problems with the data model"},{"location":"developing_a_pheval_plugin/","text":"Developing a PhEval Plugin Description Plugin development allows PhEval to be extensible, as we have designed it. The plugin goal is to be flexible through custom runner implementations. This plugin development enhances the PhEval functionality. You can build one quickly using this step-by-step process. All custom Runners implementations must implement all PhevalRunner methods Bases: ABC PhEvalRunner Class Source code in src/pheval/runners/runner.py 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 @dataclass class PhEvalRunner ( ABC ): \"\"\"PhEvalRunner Class\"\"\" input_dir : Path testdata_dir : Path tmp_dir : Path output_dir : Path config_file : Path version : str directory_path = None input_dir_config = None _meta_data = None __raw_results_dir = \"raw_results/\" __pheval_gene_results_dir = \"pheval_gene_results/\" __pheval_variant_results_dir = \"pheval_variant_results/\" __pheval_disease_results_dir = \"pheval_disease_results/\" __tool_input_commands_dir = \"tool_input_commands/\" __run_meta_data_file = \"results.yml\" def __post_init__ ( self ): self . input_dir_config = parse_input_dir_config ( self . input_dir ) def _get_tool ( self ): return self . input_dir_config . tool def _get_variant_analysis ( self ): return self . input_dir_config . variant_analysis def _get_gene_analysis ( self ): return self . input_dir_config . gene_analysis def _get_disease_analysis ( self ): return self . input_dir_config . disease_analysis @property def tool_input_commands_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __tool_input_commands_dir ) @tool_input_commands_dir . setter def tool_input_commands_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def raw_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __raw_results_dir ) @raw_results_dir . setter def raw_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def pheval_gene_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __pheval_gene_results_dir ) @pheval_gene_results_dir . setter def pheval_gene_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def pheval_variant_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __pheval_variant_results_dir ) @pheval_variant_results_dir . setter def pheval_variant_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def pheval_disease_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __pheval_disease_results_dir ) @pheval_disease_results_dir . setter def pheval_disease_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) def build_output_directory_structure ( self ): \"\"\"build output directory structure\"\"\" self . tool_input_commands_dir . mkdir ( exist_ok = True ) self . raw_results_dir . mkdir ( exist_ok = True ) if self . _get_variant_analysis (): self . pheval_variant_results_dir . mkdir ( exist_ok = True ) if self . _get_gene_analysis (): self . pheval_gene_results_dir . mkdir ( exist_ok = True ) if self . _get_disease_analysis (): self . pheval_disease_results_dir . mkdir ( exist_ok = True ) @property def meta_data ( self ): self . _meta_data = BasicOutputRunMetaData ( tool = self . input_dir_config . tool , tool_version = self . version , config = f \" { Path ( self . input_dir ) . parent . name } / { Path ( self . input_dir ) . name } \" , run_timestamp = datetime . now () . timestamp (), corpus = f \" { Path ( self . testdata_dir ) . parent . name } / { Path ( self . testdata_dir ) . name } \" , ) return self . _meta_data @meta_data . setter def meta_data ( self , meta_data ): self . _meta_data = meta_data @abstractmethod def prepare ( self ) -> str : \"\"\"prepare\"\"\" @abstractmethod def run ( self ): \"\"\"run\"\"\" @abstractmethod def post_process ( self ): \"\"\"post_process\"\"\" def construct_meta_data ( self ): \"\"\"Construct run output meta data\"\"\" return self . meta_data Step-by-Step Plugin Development Process The plugin structure is derived from a cookiecutter template, Sphintoxetry-cookiecutter , and it uses Sphinx , tox and poetry as core dependencies. This allows PhEval extensibility to be standardized in terms of documentation and dependency management. 1. Sphintoxetry-cookiecutter scaffold First, install the cruft package. Cruft enables keeping projects up-to-date with future updates made to this original template. Install the latest release of cruft from pip pip install cruft NOTE: You may encounter an error with the naming of the project layout if using an older release of cruft. To avoid this, make sure you have installed the latest release version. Next, create a project using the sphintoxetry-cookiecutter template. cruft create https://github.com/monarch-initiative/monarch-project-template 2. Further setup Install poetry if you haven't already. pip install poetry Install dependencies poetry install Add PhEval dependency poetry add pheval Run tox to see if the setup works poetry run tox 3. Implement PhEval Custom Runner The runner name is arbitrary and custom Runner name was chose by demonstrative purposes Create a runner file inside the plugin project, e.g: \"\"\"Custom Pheval Runner.\"\"\" from dataclasses import dataclass from pathlib import Path from pheval.runners.runner import PhEvalRunner @dataclass class CustomPhevalRunner ( PhEvalRunner ): \"\"\"CustomPhevalRunner Class.\"\"\" input_dir : Path testdata_dir : Path tmp_dir : Path output_dir : Path config_file : Path version : str def prepare ( self ): \"\"\"prepare method.\"\"\" print ( \"preparing\" ) def run ( self ): \"\"\"run method.\"\"\" print ( \"running with custom pheval runner\" ) def post_process ( self ): \"\"\"post_process method.\"\"\" print ( \"post processing\" ) 4. Add PhEval Plugins section to the pyproject.toml file [tool.poetry.plugins.\"pheval.plugins\"] customrunner = \"pheval_plugin_example.runner:CustomPhevalRunner\" Replace the value above with the path to your custom runner plugin 5. Implementing PhEval helper methods Streamlining the creation of your custom PhEval runner can be facilitated by leveraging PhEval's versatile helper methods, where applicable. Within PhEval, numerous public methods have been designed to assist in your runner methods. The utilisation of these helper methods is optional, yet they are crafted to enhance the overall implementation process. Utility methods The PhenopacketUtil class is designed to aid in the collection of specific data from a Phenopacket. Class for retrieving data from a Phenopacket or Family object Source code in src/pheval/utils/phenopacket_utils.py 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 class PhenopacketUtil : \"\"\"Class for retrieving data from a Phenopacket or Family object\"\"\" def __init__ ( self , phenopacket_contents : Union [ Phenopacket , Family ]): \"\"\"Initialise PhenopacketUtil Args: phenopacket_contents (Union[Phenopacket, Family]): Phenopacket or Family object \"\"\" self . phenopacket_contents = phenopacket_contents def sample_id ( self ) -> str : \"\"\" Retrieve the sample ID from a Phenopacket or proband of a Family Returns: str: Sample ID \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . subject . id else : return self . phenopacket_contents . subject . id def phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all HPO terms Returns: List[PhenotypicFeature]: List of HPO terms \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . phenotypic_features else : return self . phenopacket_contents . phenotypic_features def observed_phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all observed HPO terms Returns: List[PhenotypicFeature]: List of observed HPO terms \"\"\" phenotypic_features = [] all_phenotypic_features = self . phenotypic_features () for p in all_phenotypic_features : if p . excluded : continue phenotypic_features . append ( p ) return phenotypic_features def negated_phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all negated HPO terms Returns: List[PhenotypicFeature]: List of negated HPO terms \"\"\" negated_phenotypic_features = [] all_phenotypic_features = self . phenotypic_features () for p in all_phenotypic_features : if p . excluded : negated_phenotypic_features . append ( p ) return negated_phenotypic_features def diseases ( self ) -> List [ Disease ]: \"\"\" Retrieve a list of Diseases associated with the proband Returns: List[Disease]: List of diseases \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . diseases else : return self . phenopacket_contents . diseases def _diagnosis_from_interpretations ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a list of disease diagnoses associated with the proband from the interpretations object Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" diagnoses = [] interpretation = self . interpretations () for i in interpretation : ( diagnoses . append ( ProbandDisease ( disease_name = i . diagnosis . disease . label , disease_identifier = i . diagnosis . disease . id , ) ) if i . diagnosis . disease . label != \"\" and i . diagnosis . disease . id != \"\" else None ) return diagnoses def _diagnosis_from_disease ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a list of disease diagnoses associated with the proband from the diseases object Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" diagnoses = [] for disease in self . diseases (): diagnoses . append ( ProbandDisease ( disease_name = disease . term . label , disease_identifier = disease . term . id ) ) return diagnoses def diagnoses ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a unique list of disease diagnoses associated with the proband from a Phenopacket Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" return list ( set ( self . _diagnosis_from_interpretations () + self . _diagnosis_from_disease ())) def interpretations ( self ) -> List [ Interpretation ]: \"\"\" Retrieve a list of interpretations from a Phenopacket Returns: List[Interpretation]: List of interpretations \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . interpretations else : return self . phenopacket_contents . interpretations def causative_variants ( self ) -> List [ ProbandCausativeVariant ]: \"\"\" Retrieve a list of causative variants listed in a Phenopacket Returns: List[ProbandCausativeVariant]: List of proband causative variants \"\"\" all_variants = [] interpretation = self . interpretations () for i in interpretation : for g in i . diagnosis . genomic_interpretations : vcf_record = g . variant_interpretation . variation_descriptor . vcf_record genotype = g . variant_interpretation . variation_descriptor . allelic_state variant_data = ProbandCausativeVariant ( self . phenopacket_contents . subject . id , vcf_record . genome_assembly , GenomicVariant ( vcf_record . chrom , vcf_record . pos , vcf_record . ref , vcf_record . alt , ), genotype . label , vcf_record . info , ) all_variants . append ( variant_data ) return all_variants def files ( self ) -> List [ File ]: \"\"\" Retrieve a list of files associated with a phenopacket Returns: List[File]: List of files associated with a phenopacket \"\"\" return self . phenopacket_contents . files def vcf_file_data ( self , phenopacket_path : Path , vcf_dir : Path ) -> File : \"\"\" Retrieve the genome assembly and VCF file name from a phenopacket. Args: phenopacket_path (Path): The path to the phenopacket file. vcf_dir (Path): The directory path where the VCF file is stored. Returns: File: The VCF file with updated URI pointing to the specified directory. Raises: IncorrectFileFormatError: If the provided file is not in .vcf or .vcf.gz format. IncompatibleGenomeAssemblyError: If the genome assembly of the VCF file is not compatible. Note: This function searches for a VCF file within the provided list of files, validates its format, and checks if the genome assembly is compatible. If the conditions are met, it updates the URI of the VCF file to the specified directory and returns the modified file object. \"\"\" compatible_genome_assembly = [ \"GRCh37\" , \"hg19\" , \"GRCh38\" , \"hg38\" ] vcf_data = [ file for file in self . files () if file . file_attributes [ \"fileFormat\" ] == \"vcf\" ][ 0 ] if not Path ( vcf_data . uri ) . name . endswith ( \".vcf\" ) and not Path ( vcf_data . uri ) . name . endswith ( \".vcf.gz\" ): raise IncorrectFileFormatError ( Path ( vcf_data . uri ), \".vcf or .vcf.gz file\" ) if vcf_data . file_attributes [ \"genomeAssembly\" ] not in compatible_genome_assembly : raise IncompatibleGenomeAssemblyError ( vcf_data . file_attributes [ \"genomeAssembly\" ], phenopacket_path ) vcf_data . uri = str ( vcf_dir . joinpath ( Path ( vcf_data . uri ) . name )) return vcf_data @staticmethod def _extract_diagnosed_gene ( genomic_interpretation : GenomicInterpretation , ) -> ProbandCausativeGene : \"\"\" Retrieve the disease causing genes from the variant descriptor field if not empty, otherwise, retrieves from the gene descriptor from a phenopacket. Args: genomic_interpretation (GenomicInterpretation): A genomic interpretation from a Phenopacket Returns: ProbandCausativeGene: The disease causing gene \"\"\" if genomic_interpretation . variant_interpretation . ByteSize () != 0 : return ProbandCausativeGene ( genomic_interpretation . variant_interpretation . variation_descriptor . gene_context . symbol , genomic_interpretation . variant_interpretation . variation_descriptor . gene_context . value_id , ) else : return ProbandCausativeGene ( gene_symbol = genomic_interpretation . gene . symbol , gene_identifier = genomic_interpretation . gene . value_id , ) def diagnosed_genes ( self ) -> List [ ProbandCausativeGene ]: \"\"\" Retrieve the disease causing genes from a phenopacket. Returns: List[ProbandCausativeGene]: List of causative genes \"\"\" pheno_interpretation = self . interpretations () genes = [] for i in pheno_interpretation : for g in i . diagnosis . genomic_interpretations : genes . append ( self . _extract_diagnosed_gene ( g )) genes = list ({ gene . gene_symbol : gene for gene in genes } . values ()) return genes def diagnosed_variants ( self ) -> List [ GenomicVariant ]: \"\"\" Retrieve a list of all known causative variants from a phenopacket. Returns: List[GenomicVariant]: List of causative variants \"\"\" variants = [] pheno_interpretation = self . interpretations () for i in pheno_interpretation : for g in i . diagnosis . genomic_interpretations : variant = GenomicVariant ( chrom = str ( g . variant_interpretation . variation_descriptor . vcf_record . chrom . replace ( \"chr\" , \"\" ) ), pos = int ( g . variant_interpretation . variation_descriptor . vcf_record . pos ), ref = g . variant_interpretation . variation_descriptor . vcf_record . ref , alt = g . variant_interpretation . variation_descriptor . vcf_record . alt , ) variants . append ( variant ) return variants def check_incomplete_variant_record ( self ) -> bool : \"\"\" Check if any variant record in the phenopacket has incomplete information. This method iterates through the diagnosed variant records and checks if any of them have missing or incomplete information such as empty chromosome, position, reference, or alternate allele. Returns: bool: True if any variant record is incomplete, False otherwise. \"\"\" variants = self . diagnosed_variants () for variant in variants : if ( variant . chrom == \"\" or variant . pos == 0 or variant . pos == \"\" or variant . ref == \"\" or variant . alt == \"\" ): return True return False def check_incomplete_gene_record ( self ) -> bool : \"\"\" Check if any gene record in the phenopacket has incomplete information. This method iterates through the diagnosed gene records and checks if any of them have missing or incomplete information such as gene name, or gene identifier. Returns: bool: True if any gene record is incomplete, False otherwise. \"\"\" genes = self . diagnosed_genes () for gene in genes : if gene . gene_symbol == \"\" or gene . gene_identifier == \"\" : return True return False def check_incomplete_disease_record ( self ) -> bool : \"\"\" Check if any disease record in the phenopacket has incomplete information. This method iterates through the diagnosed disease records and checks if any of them have missing or incomplete information such as empty disease name, or disease identifier. Returns: bool: True if any disease record is incomplete, False otherwise. \"\"\" if len ( self . diagnoses ()) == 0 : return True return False PhenopacketUtil proves particularly beneficial in scenarios where the tool for which you're crafting a runner implementation does not directly accept Phenopackets as inputs. Instead, it might require elements\u2014such as HPO IDs\u2014 via the command-line interface (CLI). In this context, leveraging PhenopacketUtil within the runner's preparation phase enables the extraction of observed phenotypic features from the Phenopacket input, facilitating seamless processing. An example of how this could be implemented is outlined here: from pheval.utils.phenopacket_utils import phenopacket_reader from pheval.utils.phenopacket_utils import PhenopacketUtil phenopacket = phenopacket_reader ( \"/path/to/phenopacket.json\" ) phenopacket_util = PhenopacketUtil ( phenopacket ) # To return a list of all observed phenotypes for a phenopacket observed_phenotypes = phenopacket_util . observed_phenotypic_features () # To extract just the HPO ID as a list observed_phenotypes_hpo_ids = [ observed_phenotype . id for observed_phenotype in observed_phenotypes ] Additional tool-specific configurations For the pheval run command to execute successfully, a config.yaml should be found within the input directory supplied on the CLI. tool : tool_version : variant_analysis : gene_analysis : disease_analysis : tool_specific_configuration_options : The tool_specific_configuration_options is an optional field that can be populated with any variables specific to your runner implementation that is required for the running of your tool. All other fields are required to be filled in. The variant_analysis , gene_analysis , and disease_analysis are set as booleans and are for specifying what type of analysis/prioritisation the tool outputs. To populate the tool_specific_configurations_options with customised data, we suggest using the pydantic package as it can easily parse the data from the yaml structure. e.g., Define a BaseModel class with the fields that will populate the tool_specific_configuration_options from pydantic import BaseModel , Field class CustomisedConfigurations ( BaseModel ): \"\"\" Class for defining the customised configurations in tool_specific_configurations field, within the input_dir config.yaml Args: environment (str): Environment to run \"\"\" environment : str = Field ( ... ) Within your runner parse the field into an object. from dataclasses import dataclass from pheval.runners.runner import PhEvalRunner from pathlib import Path @dataclass class CustomPhevalRunner ( PhEvalRunner ): \"\"\"CustomPhevalRunner Class.\"\"\" input_dir : Path testdata_dir : Path tmp_dir : Path output_dir : Path config_file : Path version : str def prepare ( self ): \"\"\"prepare method.\"\"\" print ( \"preparing\" ) config = CustomisedConfigurations . parse_obj ( self . input_dir_config . tool_specific_configuration_options ) environment = config . environment def run ( self ): \"\"\"run method.\"\"\" print ( \"running with custom pheval runner\" ) def post_process ( self ): \"\"\"post_process method.\"\"\" print ( \"post processing\" ) Post-processing methods PhEval currently supports the benchmarking of gene, variant, and disease prioritisation results. To benchmark these result types, PhEval TSV result files need to be generated. PhEval can deal with the ranking and generation of these files to the correct location. However, the runner implementation must handle the extraction of essential data from the tool-specific raw results. This involves transforming them into a list comprising PhEval data classes, with each instance representing a result entry. The dataclasses representing essential information extracted from tool-specific output for gene, variant, and disease prioritisation are defined as follows: Bases: PhEvalResult Minimal data required from tool-specific output for gene prioritisation result Parameters: Name Type Description Default gene_symbol Union [ List [ str ], str ] The gene symbol(s) for the result entry required gene_identifier Union [ List [ str ], str ] The ENSEMBL gene identifier(s) for the result entry required score float The score for the gene result entry required Notes While we recommend providing the gene identifier in the ENSEMBL namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. Source code in src/pheval/post_processing/post_processing.py 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 @dataclass class PhEvalGeneResult ( PhEvalResult ): \"\"\"Minimal data required from tool-specific output for gene prioritisation result Args: gene_symbol (Union[List[str], str]): The gene symbol(s) for the result entry gene_identifier (Union[List[str], str]): The ENSEMBL gene identifier(s) for the result entry score (float): The score for the gene result entry Notes: While we recommend providing the gene identifier in the ENSEMBL namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. \"\"\" gene_symbol : Union [ List [ str ], str ] gene_identifier : Union [ List [ str ], str ] score : float Bases: PhEvalResult Minimal data required from tool-specific output for variant prioritisation Parameters: Name Type Description Default chromosome str The chromosome position of the variant recommended to be provided in the following format. required start int The start position of the variant required end int The end position of the variant required ref str The reference allele of the variant required alt str The alternate allele of the variant required score float The score for the variant result entry required Notes While we recommend providing the variant's chromosome in the specified format, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. Source code in src/pheval/post_processing/post_processing.py 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 @dataclass class PhEvalVariantResult ( PhEvalResult ): \"\"\"Minimal data required from tool-specific output for variant prioritisation Args: chromosome (str): The chromosome position of the variant recommended to be provided in the following format. This includes numerical designations from 1 to 22 representing autosomal chromosomes, as well as the sex chromosomes X and Y, and the mitochondrial chromosome MT. start (int): The start position of the variant end (int): The end position of the variant ref (str): The reference allele of the variant alt (str): The alternate allele of the variant score (float): The score for the variant result entry Notes: While we recommend providing the variant's chromosome in the specified format, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. \"\"\" chromosome : str start : int end : int ref : str alt : str score : float Bases: PhEvalResult Minimal data required from tool-specific output for disease prioritisation Parameters: Name Type Description Default disease_name str Disease name for the result entry required disease_identifier str Identifier for the disease result entry in the OMIM namespace required score str Score for the disease result entry required Notes While we recommend providing the disease identifier in the OMIM namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. Source code in src/pheval/post_processing/post_processing.py 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 @dataclass class PhEvalDiseaseResult ( PhEvalResult ): \"\"\"Minimal data required from tool-specific output for disease prioritisation Args: disease_name (str): Disease name for the result entry disease_identifier (str): Identifier for the disease result entry in the OMIM namespace score (str): Score for the disease result entry Notes: While we recommend providing the disease identifier in the OMIM namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. \"\"\" disease_name : str disease_identifier : str score : float The generate_pheval_result() can be implemented in your runner to write out the PhEval TSV results. An example of how the method can be called is outlined here: from pheval.post_processing.post_processing import generate_pheval_result generate_pheval_result ( pheval_result = pheval_gene_result , # this is the list of extracted PhEval result requirements sort_order_str = \"descending\" , # or can be ascending - this determines in which order the scores will be ranked output_dir = output_directory , # this can be accessed from the runner instance e.g., self.output_dir tool_result_path = tool_result_json # this is the path to the tool-specific raw results file ) Adding metadata to the results.yml By default, PhEval will write a results.yml to the output directory supplied on the CLI. The results.yml contains basic metadata regarding the run configuration, however, there is also the option to add customised run metadata to the results.yml in the tool_specific_configuration_options field. To achieve this, you'll need to create a construct_meta_data() method within your runner implementation. This method is responsible for appending customised metadata to the metadata object in the form of a defined dataclass. It should return the entire metadata object once the addition is completed. e.g., Defined customised metadata dataclass: from dataclasses import dataclass @dataclass class CustomisedMetaData : customised_field : str Example of implementation in the runner. from dataclasses import dataclass from pheval.runners.runner import PhEvalRunner from pathlib import Path @dataclass class CustomPhevalRunner ( PhEvalRunner ): \"\"\"CustomPhevalRunner Class.\"\"\" input_dir : Path testdata_dir : Path tmp_dir : Path output_dir : Path config_file : Path version : str def prepare ( self ): \"\"\"prepare method.\"\"\" print ( \"preparing\" ) def run ( self ): \"\"\"run method.\"\"\" print ( \"running with custom pheval runner\" ) def post_process ( self ): \"\"\"post_process method.\"\"\" print ( \"post processing\" ) def construct_meta_data ( self ): \"\"\"Add metadata.\"\"\" self . meta_data . tool_specific_configuration_options = CustomisedMetaData ( customised_field = \"customised_value\" ) return self . meta_data 6. Test it. To update your custom pheval runner implementation, you must first install the package poetry install Now you have to be able to run PhEval passing your custom runner as parameter. e.g., pheval run -i ./input_dir -t ./test_data_dir -r 'customphevalrunner' -o output_dir The -r parameter stands for your plugin runner class name, and it must be entirely lowercase. Output: preparing running with custom pheval Runner post processing Pay attention to \" running with custom pheval Runner \" line, this is exactly what we had implemented in the CustomPhevalRunner Example","title":"Developing a PhEval Plugin"},{"location":"developing_a_pheval_plugin/#developing-a-pheval-plugin","text":"","title":"Developing a PhEval Plugin"},{"location":"developing_a_pheval_plugin/#description","text":"Plugin development allows PhEval to be extensible, as we have designed it. The plugin goal is to be flexible through custom runner implementations. This plugin development enhances the PhEval functionality. You can build one quickly using this step-by-step process. All custom Runners implementations must implement all PhevalRunner methods Bases: ABC PhEvalRunner Class Source code in src/pheval/runners/runner.py 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 @dataclass class PhEvalRunner ( ABC ): \"\"\"PhEvalRunner Class\"\"\" input_dir : Path testdata_dir : Path tmp_dir : Path output_dir : Path config_file : Path version : str directory_path = None input_dir_config = None _meta_data = None __raw_results_dir = \"raw_results/\" __pheval_gene_results_dir = \"pheval_gene_results/\" __pheval_variant_results_dir = \"pheval_variant_results/\" __pheval_disease_results_dir = \"pheval_disease_results/\" __tool_input_commands_dir = \"tool_input_commands/\" __run_meta_data_file = \"results.yml\" def __post_init__ ( self ): self . input_dir_config = parse_input_dir_config ( self . input_dir ) def _get_tool ( self ): return self . input_dir_config . tool def _get_variant_analysis ( self ): return self . input_dir_config . variant_analysis def _get_gene_analysis ( self ): return self . input_dir_config . gene_analysis def _get_disease_analysis ( self ): return self . input_dir_config . disease_analysis @property def tool_input_commands_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __tool_input_commands_dir ) @tool_input_commands_dir . setter def tool_input_commands_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def raw_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __raw_results_dir ) @raw_results_dir . setter def raw_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def pheval_gene_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __pheval_gene_results_dir ) @pheval_gene_results_dir . setter def pheval_gene_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def pheval_variant_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __pheval_variant_results_dir ) @pheval_variant_results_dir . setter def pheval_variant_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def pheval_disease_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __pheval_disease_results_dir ) @pheval_disease_results_dir . setter def pheval_disease_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) def build_output_directory_structure ( self ): \"\"\"build output directory structure\"\"\" self . tool_input_commands_dir . mkdir ( exist_ok = True ) self . raw_results_dir . mkdir ( exist_ok = True ) if self . _get_variant_analysis (): self . pheval_variant_results_dir . mkdir ( exist_ok = True ) if self . _get_gene_analysis (): self . pheval_gene_results_dir . mkdir ( exist_ok = True ) if self . _get_disease_analysis (): self . pheval_disease_results_dir . mkdir ( exist_ok = True ) @property def meta_data ( self ): self . _meta_data = BasicOutputRunMetaData ( tool = self . input_dir_config . tool , tool_version = self . version , config = f \" { Path ( self . input_dir ) . parent . name } / { Path ( self . input_dir ) . name } \" , run_timestamp = datetime . now () . timestamp (), corpus = f \" { Path ( self . testdata_dir ) . parent . name } / { Path ( self . testdata_dir ) . name } \" , ) return self . _meta_data @meta_data . setter def meta_data ( self , meta_data ): self . _meta_data = meta_data @abstractmethod def prepare ( self ) -> str : \"\"\"prepare\"\"\" @abstractmethod def run ( self ): \"\"\"run\"\"\" @abstractmethod def post_process ( self ): \"\"\"post_process\"\"\" def construct_meta_data ( self ): \"\"\"Construct run output meta data\"\"\" return self . meta_data","title":"Description"},{"location":"developing_a_pheval_plugin/#step-by-step-plugin-development-process","text":"The plugin structure is derived from a cookiecutter template, Sphintoxetry-cookiecutter , and it uses Sphinx , tox and poetry as core dependencies. This allows PhEval extensibility to be standardized in terms of documentation and dependency management.","title":"Step-by-Step Plugin Development Process"},{"location":"developing_a_pheval_plugin/#1-sphintoxetry-cookiecutter-scaffold","text":"First, install the cruft package. Cruft enables keeping projects up-to-date with future updates made to this original template. Install the latest release of cruft from pip pip install cruft NOTE: You may encounter an error with the naming of the project layout if using an older release of cruft. To avoid this, make sure you have installed the latest release version. Next, create a project using the sphintoxetry-cookiecutter template. cruft create https://github.com/monarch-initiative/monarch-project-template","title":"1. Sphintoxetry-cookiecutter scaffold"},{"location":"developing_a_pheval_plugin/#2-further-setup","text":"","title":"2. Further setup"},{"location":"developing_a_pheval_plugin/#install-poetry-if-you-havent-already","text":"pip install poetry","title":"Install poetry if you haven't already."},{"location":"developing_a_pheval_plugin/#install-dependencies","text":"poetry install","title":"Install dependencies"},{"location":"developing_a_pheval_plugin/#add-pheval-dependency","text":"poetry add pheval","title":"Add PhEval dependency"},{"location":"developing_a_pheval_plugin/#run-tox-to-see-if-the-setup-works","text":"poetry run tox","title":"Run tox to see if the setup works"},{"location":"developing_a_pheval_plugin/#3-implement-pheval-custom-runner","text":"The runner name is arbitrary and custom Runner name was chose by demonstrative purposes Create a runner file inside the plugin project, e.g: \"\"\"Custom Pheval Runner.\"\"\" from dataclasses import dataclass from pathlib import Path from pheval.runners.runner import PhEvalRunner @dataclass class CustomPhevalRunner ( PhEvalRunner ): \"\"\"CustomPhevalRunner Class.\"\"\" input_dir : Path testdata_dir : Path tmp_dir : Path output_dir : Path config_file : Path version : str def prepare ( self ): \"\"\"prepare method.\"\"\" print ( \"preparing\" ) def run ( self ): \"\"\"run method.\"\"\" print ( \"running with custom pheval runner\" ) def post_process ( self ): \"\"\"post_process method.\"\"\" print ( \"post processing\" )","title":"3. Implement PhEval Custom Runner"},{"location":"developing_a_pheval_plugin/#4-add-pheval-plugins-section-to-the-pyprojecttoml-file","text":"[tool.poetry.plugins.\"pheval.plugins\"] customrunner = \"pheval_plugin_example.runner:CustomPhevalRunner\" Replace the value above with the path to your custom runner plugin","title":"4. Add PhEval Plugins section to the pyproject.toml file"},{"location":"developing_a_pheval_plugin/#5-implementing-pheval-helper-methods","text":"Streamlining the creation of your custom PhEval runner can be facilitated by leveraging PhEval's versatile helper methods, where applicable. Within PhEval, numerous public methods have been designed to assist in your runner methods. The utilisation of these helper methods is optional, yet they are crafted to enhance the overall implementation process.","title":"5. Implementing PhEval helper methods"},{"location":"developing_a_pheval_plugin/#utility-methods","text":"The PhenopacketUtil class is designed to aid in the collection of specific data from a Phenopacket. Class for retrieving data from a Phenopacket or Family object Source code in src/pheval/utils/phenopacket_utils.py 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 class PhenopacketUtil : \"\"\"Class for retrieving data from a Phenopacket or Family object\"\"\" def __init__ ( self , phenopacket_contents : Union [ Phenopacket , Family ]): \"\"\"Initialise PhenopacketUtil Args: phenopacket_contents (Union[Phenopacket, Family]): Phenopacket or Family object \"\"\" self . phenopacket_contents = phenopacket_contents def sample_id ( self ) -> str : \"\"\" Retrieve the sample ID from a Phenopacket or proband of a Family Returns: str: Sample ID \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . subject . id else : return self . phenopacket_contents . subject . id def phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all HPO terms Returns: List[PhenotypicFeature]: List of HPO terms \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . phenotypic_features else : return self . phenopacket_contents . phenotypic_features def observed_phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all observed HPO terms Returns: List[PhenotypicFeature]: List of observed HPO terms \"\"\" phenotypic_features = [] all_phenotypic_features = self . phenotypic_features () for p in all_phenotypic_features : if p . excluded : continue phenotypic_features . append ( p ) return phenotypic_features def negated_phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all negated HPO terms Returns: List[PhenotypicFeature]: List of negated HPO terms \"\"\" negated_phenotypic_features = [] all_phenotypic_features = self . phenotypic_features () for p in all_phenotypic_features : if p . excluded : negated_phenotypic_features . append ( p ) return negated_phenotypic_features def diseases ( self ) -> List [ Disease ]: \"\"\" Retrieve a list of Diseases associated with the proband Returns: List[Disease]: List of diseases \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . diseases else : return self . phenopacket_contents . diseases def _diagnosis_from_interpretations ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a list of disease diagnoses associated with the proband from the interpretations object Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" diagnoses = [] interpretation = self . interpretations () for i in interpretation : ( diagnoses . append ( ProbandDisease ( disease_name = i . diagnosis . disease . label , disease_identifier = i . diagnosis . disease . id , ) ) if i . diagnosis . disease . label != \"\" and i . diagnosis . disease . id != \"\" else None ) return diagnoses def _diagnosis_from_disease ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a list of disease diagnoses associated with the proband from the diseases object Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" diagnoses = [] for disease in self . diseases (): diagnoses . append ( ProbandDisease ( disease_name = disease . term . label , disease_identifier = disease . term . id ) ) return diagnoses def diagnoses ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a unique list of disease diagnoses associated with the proband from a Phenopacket Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" return list ( set ( self . _diagnosis_from_interpretations () + self . _diagnosis_from_disease ())) def interpretations ( self ) -> List [ Interpretation ]: \"\"\" Retrieve a list of interpretations from a Phenopacket Returns: List[Interpretation]: List of interpretations \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . interpretations else : return self . phenopacket_contents . interpretations def causative_variants ( self ) -> List [ ProbandCausativeVariant ]: \"\"\" Retrieve a list of causative variants listed in a Phenopacket Returns: List[ProbandCausativeVariant]: List of proband causative variants \"\"\" all_variants = [] interpretation = self . interpretations () for i in interpretation : for g in i . diagnosis . genomic_interpretations : vcf_record = g . variant_interpretation . variation_descriptor . vcf_record genotype = g . variant_interpretation . variation_descriptor . allelic_state variant_data = ProbandCausativeVariant ( self . phenopacket_contents . subject . id , vcf_record . genome_assembly , GenomicVariant ( vcf_record . chrom , vcf_record . pos , vcf_record . ref , vcf_record . alt , ), genotype . label , vcf_record . info , ) all_variants . append ( variant_data ) return all_variants def files ( self ) -> List [ File ]: \"\"\" Retrieve a list of files associated with a phenopacket Returns: List[File]: List of files associated with a phenopacket \"\"\" return self . phenopacket_contents . files def vcf_file_data ( self , phenopacket_path : Path , vcf_dir : Path ) -> File : \"\"\" Retrieve the genome assembly and VCF file name from a phenopacket. Args: phenopacket_path (Path): The path to the phenopacket file. vcf_dir (Path): The directory path where the VCF file is stored. Returns: File: The VCF file with updated URI pointing to the specified directory. Raises: IncorrectFileFormatError: If the provided file is not in .vcf or .vcf.gz format. IncompatibleGenomeAssemblyError: If the genome assembly of the VCF file is not compatible. Note: This function searches for a VCF file within the provided list of files, validates its format, and checks if the genome assembly is compatible. If the conditions are met, it updates the URI of the VCF file to the specified directory and returns the modified file object. \"\"\" compatible_genome_assembly = [ \"GRCh37\" , \"hg19\" , \"GRCh38\" , \"hg38\" ] vcf_data = [ file for file in self . files () if file . file_attributes [ \"fileFormat\" ] == \"vcf\" ][ 0 ] if not Path ( vcf_data . uri ) . name . endswith ( \".vcf\" ) and not Path ( vcf_data . uri ) . name . endswith ( \".vcf.gz\" ): raise IncorrectFileFormatError ( Path ( vcf_data . uri ), \".vcf or .vcf.gz file\" ) if vcf_data . file_attributes [ \"genomeAssembly\" ] not in compatible_genome_assembly : raise IncompatibleGenomeAssemblyError ( vcf_data . file_attributes [ \"genomeAssembly\" ], phenopacket_path ) vcf_data . uri = str ( vcf_dir . joinpath ( Path ( vcf_data . uri ) . name )) return vcf_data @staticmethod def _extract_diagnosed_gene ( genomic_interpretation : GenomicInterpretation , ) -> ProbandCausativeGene : \"\"\" Retrieve the disease causing genes from the variant descriptor field if not empty, otherwise, retrieves from the gene descriptor from a phenopacket. Args: genomic_interpretation (GenomicInterpretation): A genomic interpretation from a Phenopacket Returns: ProbandCausativeGene: The disease causing gene \"\"\" if genomic_interpretation . variant_interpretation . ByteSize () != 0 : return ProbandCausativeGene ( genomic_interpretation . variant_interpretation . variation_descriptor . gene_context . symbol , genomic_interpretation . variant_interpretation . variation_descriptor . gene_context . value_id , ) else : return ProbandCausativeGene ( gene_symbol = genomic_interpretation . gene . symbol , gene_identifier = genomic_interpretation . gene . value_id , ) def diagnosed_genes ( self ) -> List [ ProbandCausativeGene ]: \"\"\" Retrieve the disease causing genes from a phenopacket. Returns: List[ProbandCausativeGene]: List of causative genes \"\"\" pheno_interpretation = self . interpretations () genes = [] for i in pheno_interpretation : for g in i . diagnosis . genomic_interpretations : genes . append ( self . _extract_diagnosed_gene ( g )) genes = list ({ gene . gene_symbol : gene for gene in genes } . values ()) return genes def diagnosed_variants ( self ) -> List [ GenomicVariant ]: \"\"\" Retrieve a list of all known causative variants from a phenopacket. Returns: List[GenomicVariant]: List of causative variants \"\"\" variants = [] pheno_interpretation = self . interpretations () for i in pheno_interpretation : for g in i . diagnosis . genomic_interpretations : variant = GenomicVariant ( chrom = str ( g . variant_interpretation . variation_descriptor . vcf_record . chrom . replace ( \"chr\" , \"\" ) ), pos = int ( g . variant_interpretation . variation_descriptor . vcf_record . pos ), ref = g . variant_interpretation . variation_descriptor . vcf_record . ref , alt = g . variant_interpretation . variation_descriptor . vcf_record . alt , ) variants . append ( variant ) return variants def check_incomplete_variant_record ( self ) -> bool : \"\"\" Check if any variant record in the phenopacket has incomplete information. This method iterates through the diagnosed variant records and checks if any of them have missing or incomplete information such as empty chromosome, position, reference, or alternate allele. Returns: bool: True if any variant record is incomplete, False otherwise. \"\"\" variants = self . diagnosed_variants () for variant in variants : if ( variant . chrom == \"\" or variant . pos == 0 or variant . pos == \"\" or variant . ref == \"\" or variant . alt == \"\" ): return True return False def check_incomplete_gene_record ( self ) -> bool : \"\"\" Check if any gene record in the phenopacket has incomplete information. This method iterates through the diagnosed gene records and checks if any of them have missing or incomplete information such as gene name, or gene identifier. Returns: bool: True if any gene record is incomplete, False otherwise. \"\"\" genes = self . diagnosed_genes () for gene in genes : if gene . gene_symbol == \"\" or gene . gene_identifier == \"\" : return True return False def check_incomplete_disease_record ( self ) -> bool : \"\"\" Check if any disease record in the phenopacket has incomplete information. This method iterates through the diagnosed disease records and checks if any of them have missing or incomplete information such as empty disease name, or disease identifier. Returns: bool: True if any disease record is incomplete, False otherwise. \"\"\" if len ( self . diagnoses ()) == 0 : return True return False PhenopacketUtil proves particularly beneficial in scenarios where the tool for which you're crafting a runner implementation does not directly accept Phenopackets as inputs. Instead, it might require elements\u2014such as HPO IDs\u2014 via the command-line interface (CLI). In this context, leveraging PhenopacketUtil within the runner's preparation phase enables the extraction of observed phenotypic features from the Phenopacket input, facilitating seamless processing. An example of how this could be implemented is outlined here: from pheval.utils.phenopacket_utils import phenopacket_reader from pheval.utils.phenopacket_utils import PhenopacketUtil phenopacket = phenopacket_reader ( \"/path/to/phenopacket.json\" ) phenopacket_util = PhenopacketUtil ( phenopacket ) # To return a list of all observed phenotypes for a phenopacket observed_phenotypes = phenopacket_util . observed_phenotypic_features () # To extract just the HPO ID as a list observed_phenotypes_hpo_ids = [ observed_phenotype . id for observed_phenotype in observed_phenotypes ]","title":"Utility methods"},{"location":"developing_a_pheval_plugin/#additional-tool-specific-configurations","text":"For the pheval run command to execute successfully, a config.yaml should be found within the input directory supplied on the CLI. tool : tool_version : variant_analysis : gene_analysis : disease_analysis : tool_specific_configuration_options : The tool_specific_configuration_options is an optional field that can be populated with any variables specific to your runner implementation that is required for the running of your tool. All other fields are required to be filled in. The variant_analysis , gene_analysis , and disease_analysis are set as booleans and are for specifying what type of analysis/prioritisation the tool outputs. To populate the tool_specific_configurations_options with customised data, we suggest using the pydantic package as it can easily parse the data from the yaml structure. e.g., Define a BaseModel class with the fields that will populate the tool_specific_configuration_options from pydantic import BaseModel , Field class CustomisedConfigurations ( BaseModel ): \"\"\" Class for defining the customised configurations in tool_specific_configurations field, within the input_dir config.yaml Args: environment (str): Environment to run \"\"\" environment : str = Field ( ... ) Within your runner parse the field into an object. from dataclasses import dataclass from pheval.runners.runner import PhEvalRunner from pathlib import Path @dataclass class CustomPhevalRunner ( PhEvalRunner ): \"\"\"CustomPhevalRunner Class.\"\"\" input_dir : Path testdata_dir : Path tmp_dir : Path output_dir : Path config_file : Path version : str def prepare ( self ): \"\"\"prepare method.\"\"\" print ( \"preparing\" ) config = CustomisedConfigurations . parse_obj ( self . input_dir_config . tool_specific_configuration_options ) environment = config . environment def run ( self ): \"\"\"run method.\"\"\" print ( \"running with custom pheval runner\" ) def post_process ( self ): \"\"\"post_process method.\"\"\" print ( \"post processing\" )","title":"Additional tool-specific configurations"},{"location":"developing_a_pheval_plugin/#post-processing-methods","text":"PhEval currently supports the benchmarking of gene, variant, and disease prioritisation results. To benchmark these result types, PhEval TSV result files need to be generated. PhEval can deal with the ranking and generation of these files to the correct location. However, the runner implementation must handle the extraction of essential data from the tool-specific raw results. This involves transforming them into a list comprising PhEval data classes, with each instance representing a result entry. The dataclasses representing essential information extracted from tool-specific output for gene, variant, and disease prioritisation are defined as follows: Bases: PhEvalResult Minimal data required from tool-specific output for gene prioritisation result Parameters: Name Type Description Default gene_symbol Union [ List [ str ], str ] The gene symbol(s) for the result entry required gene_identifier Union [ List [ str ], str ] The ENSEMBL gene identifier(s) for the result entry required score float The score for the gene result entry required Notes While we recommend providing the gene identifier in the ENSEMBL namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. Source code in src/pheval/post_processing/post_processing.py 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 @dataclass class PhEvalGeneResult ( PhEvalResult ): \"\"\"Minimal data required from tool-specific output for gene prioritisation result Args: gene_symbol (Union[List[str], str]): The gene symbol(s) for the result entry gene_identifier (Union[List[str], str]): The ENSEMBL gene identifier(s) for the result entry score (float): The score for the gene result entry Notes: While we recommend providing the gene identifier in the ENSEMBL namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. \"\"\" gene_symbol : Union [ List [ str ], str ] gene_identifier : Union [ List [ str ], str ] score : float Bases: PhEvalResult Minimal data required from tool-specific output for variant prioritisation Parameters: Name Type Description Default chromosome str The chromosome position of the variant recommended to be provided in the following format. required start int The start position of the variant required end int The end position of the variant required ref str The reference allele of the variant required alt str The alternate allele of the variant required score float The score for the variant result entry required Notes While we recommend providing the variant's chromosome in the specified format, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. Source code in src/pheval/post_processing/post_processing.py 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 @dataclass class PhEvalVariantResult ( PhEvalResult ): \"\"\"Minimal data required from tool-specific output for variant prioritisation Args: chromosome (str): The chromosome position of the variant recommended to be provided in the following format. This includes numerical designations from 1 to 22 representing autosomal chromosomes, as well as the sex chromosomes X and Y, and the mitochondrial chromosome MT. start (int): The start position of the variant end (int): The end position of the variant ref (str): The reference allele of the variant alt (str): The alternate allele of the variant score (float): The score for the variant result entry Notes: While we recommend providing the variant's chromosome in the specified format, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. \"\"\" chromosome : str start : int end : int ref : str alt : str score : float Bases: PhEvalResult Minimal data required from tool-specific output for disease prioritisation Parameters: Name Type Description Default disease_name str Disease name for the result entry required disease_identifier str Identifier for the disease result entry in the OMIM namespace required score str Score for the disease result entry required Notes While we recommend providing the disease identifier in the OMIM namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. Source code in src/pheval/post_processing/post_processing.py 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 @dataclass class PhEvalDiseaseResult ( PhEvalResult ): \"\"\"Minimal data required from tool-specific output for disease prioritisation Args: disease_name (str): Disease name for the result entry disease_identifier (str): Identifier for the disease result entry in the OMIM namespace score (str): Score for the disease result entry Notes: While we recommend providing the disease identifier in the OMIM namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. \"\"\" disease_name : str disease_identifier : str score : float The generate_pheval_result() can be implemented in your runner to write out the PhEval TSV results. An example of how the method can be called is outlined here: from pheval.post_processing.post_processing import generate_pheval_result generate_pheval_result ( pheval_result = pheval_gene_result , # this is the list of extracted PhEval result requirements sort_order_str = \"descending\" , # or can be ascending - this determines in which order the scores will be ranked output_dir = output_directory , # this can be accessed from the runner instance e.g., self.output_dir tool_result_path = tool_result_json # this is the path to the tool-specific raw results file )","title":"Post-processing methods"},{"location":"developing_a_pheval_plugin/#adding-metadata-to-the-resultsyml","text":"By default, PhEval will write a results.yml to the output directory supplied on the CLI. The results.yml contains basic metadata regarding the run configuration, however, there is also the option to add customised run metadata to the results.yml in the tool_specific_configuration_options field. To achieve this, you'll need to create a construct_meta_data() method within your runner implementation. This method is responsible for appending customised metadata to the metadata object in the form of a defined dataclass. It should return the entire metadata object once the addition is completed. e.g., Defined customised metadata dataclass: from dataclasses import dataclass @dataclass class CustomisedMetaData : customised_field : str Example of implementation in the runner. from dataclasses import dataclass from pheval.runners.runner import PhEvalRunner from pathlib import Path @dataclass class CustomPhevalRunner ( PhEvalRunner ): \"\"\"CustomPhevalRunner Class.\"\"\" input_dir : Path testdata_dir : Path tmp_dir : Path output_dir : Path config_file : Path version : str def prepare ( self ): \"\"\"prepare method.\"\"\" print ( \"preparing\" ) def run ( self ): \"\"\"run method.\"\"\" print ( \"running with custom pheval runner\" ) def post_process ( self ): \"\"\"post_process method.\"\"\" print ( \"post processing\" ) def construct_meta_data ( self ): \"\"\"Add metadata.\"\"\" self . meta_data . tool_specific_configuration_options = CustomisedMetaData ( customised_field = \"customised_value\" ) return self . meta_data","title":"Adding metadata to the results.yml"},{"location":"developing_a_pheval_plugin/#6-test-it","text":"To update your custom pheval runner implementation, you must first install the package poetry install Now you have to be able to run PhEval passing your custom runner as parameter. e.g., pheval run -i ./input_dir -t ./test_data_dir -r 'customphevalrunner' -o output_dir The -r parameter stands for your plugin runner class name, and it must be entirely lowercase. Output: preparing running with custom pheval Runner post processing Pay attention to \" running with custom pheval Runner \" line, this is exactly what we had implemented in the CustomPhevalRunner Example","title":"6. Test it."},{"location":"exomiser_pipeline/","text":"PhEval Pipeline Exomiser Runner Step by Step to PhEval Run Pipeline (with ExomiserRunner) 1. Download Exomiser Software wget https://github.com/exomiser/Exomiser/releases/download/13.2.0/exomiser-cli-13.2.0-distribution.zip 2. Download Phenotype Data wget https://data.monarchinitiative.org/exomiser/latest/2302_hg19.zip wget https://data.monarchinitiative.org/exomiser/latest/2302_hg38.zip wget https://data.monarchinitiative.org/exomiser/latest/2302_phenotype.zip 3. Unzip data # unzip the distribution and data files - this will create a directory called 'exomiser-cli-13.1.0' in the current working directory unzip exomiser-cli-13.2.0-distribution.zip unzip 2302_hg19.zip -d exomiser-cli-13.2.0/data unzip 2302_hg38.zip -d exomiser-cli-13.2.0/data 4. Clone PhEval repo and follow steps described in Pipeline Documentation: git clone https://github.com/monarch-initiative/pheval.git cd pheval poetry shell poetry install pip install pheval.exomiser 5. Set PhEval Config YAML File directories : tmp : data/tmp exomiser : /path_where_exomiser_was_extracted phenotype : /path_where_phenotype_was_extracted workspace : /pheval's_path # path where pheval was cloned corpora : - id : small_test scrambled : - factor : 0.5 - factor : 0.7 custom_variants : - id : no_phenotype configs : - tool : exomiser version : 13.2.0 configuration : default exomiser_db : semsim1 runs : - tool : exomiser configuration : default corpus : small_test corpusvariant : scrambled-0.5 version : 13.2.0 6. Generate Makefile based on configuration bash ./resources/generatemakefile.sh 7. Exomiser Runner requires the following configuration The config.yaml file should be formatted like the example below and must be placed in exomiser: /path_where_exomiser_was_extracted declared in pheval-config.yaml file. tool : exomiser tool_version : 13.2.0 variant_analysis : True gene_analysis : True disease_analysis : True tool_specific_configuration_options : environment : local exomiser_software_directory : . analysis_configuration_file : preset-exome-analysis.yml max_jobs : 0 application_properties : remm_version : cadd_version : hg19_data_version : 2302 hg19_local_frequency_path : hg38_data_version : 2302 phenotype_data_version : 2302 cache_type : cache_caffeine_spec : post_process : score_name : combinedScore sort_order : DESCENDING 8. Preset Exome Analysis File Exomiser requires a preset-exome-analysis.yml file saved at /path_where_exomiser_was_extracted/preset-exome-analysis.yml This is an example of preset-exome-analysis.yml file ## Exomiser Analysis Template. # These are all the possible options for running exomiser. Use this as a template for # your own set-up. --- analysisMode : PASS_ONLY inheritanceModes : { AUTOSOMAL_DOMINANT : 0.1 , AUTOSOMAL_RECESSIVE_HOM_ALT : 0.1 , AUTOSOMAL_RECESSIVE_COMP_HET : 2.0 , X_DOMINANT : 0.1 , X_RECESSIVE_HOM_ALT : 0.1 , X_RECESSIVE_COMP_HET : 2.0 , MITOCHONDRIAL : 0.2 } frequencySources : [ THOUSAND_GENOMES , TOPMED , UK10K , ESP_AFRICAN_AMERICAN , ESP_EUROPEAN_AMERICAN , ESP_ALL , EXAC_AFRICAN_INC_AFRICAN_AMERICAN , EXAC_AMERICAN , EXAC_SOUTH_ASIAN , EXAC_EAST_ASIAN , EXAC_FINNISH , EXAC_NON_FINNISH_EUROPEAN , EXAC_OTHER , GNOMAD_E_AFR , GNOMAD_E_AMR , # GNOMAD_E_ASJ, GNOMAD_E_EAS , GNOMAD_E_FIN , GNOMAD_E_NFE , GNOMAD_E_OTH , GNOMAD_E_SAS , GNOMAD_G_AFR , GNOMAD_G_AMR , # GNOMAD_G_ASJ, GNOMAD_G_EAS , GNOMAD_G_FIN , GNOMAD_G_NFE , GNOMAD_G_OTH , GNOMAD_G_SAS ] # Possible pathogenicitySources: (POLYPHEN, MUTATION_TASTER, SIFT), (REVEL, MVP), CADD, REMM # REMM is trained on non-coding regulatory regions # *WARNING* if you enable CADD or REMM ensure that you have downloaded and installed the CADD/REMM tabix files # and updated their location in the application.properties. Exomiser will not run without this. pathogenicitySources : [ REVEL , MVP ] #this is the standard exomiser order. steps : [ failedVariantFilter : { }, variantEffectFilter : { remove : [ FIVE_PRIME_UTR_EXON_VARIANT , FIVE_PRIME_UTR_INTRON_VARIANT , THREE_PRIME_UTR_EXON_VARIANT , THREE_PRIME_UTR_INTRON_VARIANT , NON_CODING_TRANSCRIPT_EXON_VARIANT , NON_CODING_TRANSCRIPT_INTRON_VARIANT , CODING_TRANSCRIPT_INTRON_VARIANT , UPSTREAM_GENE_VARIANT , DOWNSTREAM_GENE_VARIANT , INTERGENIC_VARIANT , REGULATORY_REGION_VARIANT ] }, frequencyFilter : { maxFrequency : 2.0 }, pathogenicityFilter : { keepNonPathogenic : true }, inheritanceFilter : { }, omimPrioritiser : { }, hiPhivePrioritiser : { } ] 9. PhEval Run make pheval run","title":"PhEval Pipeline Exomiser Runner"},{"location":"exomiser_pipeline/#pheval-pipeline-exomiser-runner","text":"","title":"PhEval Pipeline Exomiser Runner"},{"location":"exomiser_pipeline/#step-by-step-to-pheval-run-pipeline-with-exomiserrunner","text":"","title":"Step by Step to PhEval Run Pipeline (with ExomiserRunner)"},{"location":"exomiser_pipeline/#1-download-exomiser-software","text":"wget https://github.com/exomiser/Exomiser/releases/download/13.2.0/exomiser-cli-13.2.0-distribution.zip","title":"1. Download Exomiser Software"},{"location":"exomiser_pipeline/#2-download-phenotype-data","text":"wget https://data.monarchinitiative.org/exomiser/latest/2302_hg19.zip wget https://data.monarchinitiative.org/exomiser/latest/2302_hg38.zip wget https://data.monarchinitiative.org/exomiser/latest/2302_phenotype.zip","title":"2. Download Phenotype Data"},{"location":"exomiser_pipeline/#3-unzip-data","text":"# unzip the distribution and data files - this will create a directory called 'exomiser-cli-13.1.0' in the current working directory unzip exomiser-cli-13.2.0-distribution.zip unzip 2302_hg19.zip -d exomiser-cli-13.2.0/data unzip 2302_hg38.zip -d exomiser-cli-13.2.0/data","title":"3. Unzip data"},{"location":"exomiser_pipeline/#4-clone-pheval-repo-and-follow-steps-described-in-pipeline-documentation","text":"git clone https://github.com/monarch-initiative/pheval.git cd pheval poetry shell poetry install pip install pheval.exomiser","title":"4. Clone PhEval repo and follow steps described in Pipeline Documentation:"},{"location":"exomiser_pipeline/#5-set-pheval-config-yaml-file","text":"directories : tmp : data/tmp exomiser : /path_where_exomiser_was_extracted phenotype : /path_where_phenotype_was_extracted workspace : /pheval's_path # path where pheval was cloned corpora : - id : small_test scrambled : - factor : 0.5 - factor : 0.7 custom_variants : - id : no_phenotype configs : - tool : exomiser version : 13.2.0 configuration : default exomiser_db : semsim1 runs : - tool : exomiser configuration : default corpus : small_test corpusvariant : scrambled-0.5 version : 13.2.0","title":"5. Set PhEval Config YAML File"},{"location":"exomiser_pipeline/#6-generate-makefile-based-on-configuration","text":"bash ./resources/generatemakefile.sh","title":"6. Generate Makefile based on configuration"},{"location":"exomiser_pipeline/#7-exomiser-runner-requires-the-following-configuration","text":"The config.yaml file should be formatted like the example below and must be placed in exomiser: /path_where_exomiser_was_extracted declared in pheval-config.yaml file. tool : exomiser tool_version : 13.2.0 variant_analysis : True gene_analysis : True disease_analysis : True tool_specific_configuration_options : environment : local exomiser_software_directory : . analysis_configuration_file : preset-exome-analysis.yml max_jobs : 0 application_properties : remm_version : cadd_version : hg19_data_version : 2302 hg19_local_frequency_path : hg38_data_version : 2302 phenotype_data_version : 2302 cache_type : cache_caffeine_spec : post_process : score_name : combinedScore sort_order : DESCENDING","title":"7. Exomiser Runner requires the following configuration"},{"location":"exomiser_pipeline/#8-preset-exome-analysis-file","text":"Exomiser requires a preset-exome-analysis.yml file saved at /path_where_exomiser_was_extracted/preset-exome-analysis.yml This is an example of preset-exome-analysis.yml file ## Exomiser Analysis Template. # These are all the possible options for running exomiser. Use this as a template for # your own set-up. --- analysisMode : PASS_ONLY inheritanceModes : { AUTOSOMAL_DOMINANT : 0.1 , AUTOSOMAL_RECESSIVE_HOM_ALT : 0.1 , AUTOSOMAL_RECESSIVE_COMP_HET : 2.0 , X_DOMINANT : 0.1 , X_RECESSIVE_HOM_ALT : 0.1 , X_RECESSIVE_COMP_HET : 2.0 , MITOCHONDRIAL : 0.2 } frequencySources : [ THOUSAND_GENOMES , TOPMED , UK10K , ESP_AFRICAN_AMERICAN , ESP_EUROPEAN_AMERICAN , ESP_ALL , EXAC_AFRICAN_INC_AFRICAN_AMERICAN , EXAC_AMERICAN , EXAC_SOUTH_ASIAN , EXAC_EAST_ASIAN , EXAC_FINNISH , EXAC_NON_FINNISH_EUROPEAN , EXAC_OTHER , GNOMAD_E_AFR , GNOMAD_E_AMR , # GNOMAD_E_ASJ, GNOMAD_E_EAS , GNOMAD_E_FIN , GNOMAD_E_NFE , GNOMAD_E_OTH , GNOMAD_E_SAS , GNOMAD_G_AFR , GNOMAD_G_AMR , # GNOMAD_G_ASJ, GNOMAD_G_EAS , GNOMAD_G_FIN , GNOMAD_G_NFE , GNOMAD_G_OTH , GNOMAD_G_SAS ] # Possible pathogenicitySources: (POLYPHEN, MUTATION_TASTER, SIFT), (REVEL, MVP), CADD, REMM # REMM is trained on non-coding regulatory regions # *WARNING* if you enable CADD or REMM ensure that you have downloaded and installed the CADD/REMM tabix files # and updated their location in the application.properties. Exomiser will not run without this. pathogenicitySources : [ REVEL , MVP ] #this is the standard exomiser order. steps : [ failedVariantFilter : { }, variantEffectFilter : { remove : [ FIVE_PRIME_UTR_EXON_VARIANT , FIVE_PRIME_UTR_INTRON_VARIANT , THREE_PRIME_UTR_EXON_VARIANT , THREE_PRIME_UTR_INTRON_VARIANT , NON_CODING_TRANSCRIPT_EXON_VARIANT , NON_CODING_TRANSCRIPT_INTRON_VARIANT , CODING_TRANSCRIPT_INTRON_VARIANT , UPSTREAM_GENE_VARIANT , DOWNSTREAM_GENE_VARIANT , INTERGENIC_VARIANT , REGULATORY_REGION_VARIANT ] }, frequencyFilter : { maxFrequency : 2.0 }, pathogenicityFilter : { keepNonPathogenic : true }, inheritanceFilter : { }, omimPrioritiser : { }, hiPhivePrioritiser : { } ]","title":"8. Preset Exome Analysis File"},{"location":"exomiser_pipeline/#9-pheval-run","text":"make pheval run","title":"9. PhEval Run"},{"location":"pipeline/","text":"PhEval Pipeline 1. Clone PhEval git clone https://github.com/monarch-initiative/pheval.git 2. Installing PhEval dependencies Enter in the cloned folder and enter the following commands: poetry shell poetry install 3. Generate custom Makefile You must have Jinja2 installed, if you don't follow the steps here In resources folder are the following files responsible for makefile generation: \ud83d\udce6resources \u2523 \ud83d\udcdcMakefile.j2 \u2523 \ud83d\udcdccustom.Makefile \u2523 \ud83d\udcdcgeneratemakefile.sh \u2517 \ud83d\udcdcpheval-config.yaml You must edit the pheval-config.yaml file setting the directory where you extracted exomiser and phenotype data. An example could be found here . After setting the pheval-config.yaml file flowchart TD inputs[\"prepare-inputs\"] sr1[\"Setting up Runners\"] corpora[\"prepare-corpora\"] scrambling[\"Scrambing Process\"] r1[\"run\"] inputs === sr1 sr1 === corpora corpora === scrambling scrambling === r1 Data Flow flowchart LR vcf[(\"Phenopackets Original Data\")] pheno[(\"Scrambled Phenopackets\")] result[\"Phenotype Result\"] vcf -- prepare-corpora --> pheno pheno -- scramble factor e.g 0.5 --> result Jinja Template PhEval Makefile Generator Requirements To generate a PhEval Makefile we use the Jinja template engine. Installing Jinja Template Linux (Ubuntu): sudo snap install j2 Mac OS: PhEval Makefile Template (.j2 file) \ud83d\udce6resources \u2523 \ud83d\udcdc Makefile.j2 custom.Makefile is the template that will be generated on the fly based on the pheval-config.yaml . Each of these configurations is filled using a syntax like this: {{ config.tool }} . The value between the curly brackets is replaced by the corresponding configuration in the configuration file. PhEval custom.Makefile \ud83d\udce6resources \u2523 \ud83d\udcdc custom.Makefile PhEval generatemakefile.sh \ud83d\udce6resources \u2523 \ud83d\udcdcgeneratemakefile.sh generatemakefile.sh is only a shortcut for Makefile rendering using the configuration file e.g. bash ./resources/generatemakefile.sh PhEval Configuration File In resources folder, there is a file named pheval-config.yaml , this file is responsible for storing the PhEval Makefile generation. \ud83d\udce6resources \u2517 \ud83d\udcdcpheval-config.yaml Directories Section directories : tmp : data/tmp h2jar : ./h2-1.4.199.jar phen2gene : ./Phen2Gene exomiser : /home/data/exomiser/exomiser-cli-13.2.0-distribution/exomiser-cli-13.2.0 phenotype : /home/data/phenotype workspace : /tmp/pheval Configs Section configs : - tool : phen2gene version : 1.2.3 configuration : default - tool : exomiser version : 13.2.0 configuration : default exomiser_db : semsim1 This section is responsible for setting up the configuration folder. All software declared in the configs section will be linked in this folder. In the configuration above, for example, we have one configuration for phen2gene and one for exomiser. In the Directories Section , these two configurations must have one corresponding property set up. PhEval pipeline invokes the prepare-inputs goal, and in the preceding example, a configuration folder structure will be built that looks like this: \ud83d\udce6configurations \u2523 \ud83d\udcc2exomiser-13.2.0-default \u2517 \ud83d\udcc2phen2gene-1.2.3-default Each of these folders is a symbolic link that points to the corresponding software folder indicated in the Directories Section Corpora Section corpora : - id : lirical scrambled : - factor : 0.5 - factor : 0.7 custom_variants : - id : no_phenotype - id : phen2gene scrambled : - factor : 0.2 - factor : 0.9 custom_variants : - id : no_phenotype In this corpora section we can set up different experiments for corpus scrambling. Currently, PhEval provides corpora data from lirical, phen2gene, small_test and structural_variants \ud83d\udce6corpora \u2523 \ud83d\udcc2lirical \u2523 \ud83d\udcc2phen2gene \u2523 \ud83d\udcc2small_test \u2517 \ud83d\udcc2structural_variants The scramble property defines the magnitude of the scrambling factor during Phenopackets and VCF variants spiking process. Using the configuration in the example above, a corpora structure will be created like this: \ud83d\udce6corpora \u2523 \ud83d\udcc2lirical \u2503 \u2517 \ud83d\udcc2default \u2503 \u2517 \ud83d\udcc2scrambled-0.5 \u2503 \u2517 \ud83d\udcc2scrambled-0.7 \u2523 \ud83d\udcc2phen2gene \u2503 \u2517 \ud83d\udcc2default \u2503 \u2517 \ud83d\udcc2scrambled-0.2 \u2503 \u2517 \ud83d\udcc2scrambled-0.9 Runs Section runs : - tool : exomiser configuration : default corpus : lirical corpusvariant : scrambled-0.5 version : 13.2.0 - tool : phen2gene configuration : default corpus : phen2gene corpusvariant : scrambled-0.2 version : 1.2.3 Phen2Gen Specific Configuration The input directory config.yaml should be formatted like the example below and must be placed in phen2gene: /pathtoPhen2Gene/Phen2Gene declared in pheval-config.yaml file. tool : phen2gene tool_version : 1.2.3 phenotype_only : True tool_specific_configuration_options : environment : local phen2gene_python_executable : phen2gene.py post_process : score_order : descending Makefile Goals make pheval this runs the entire pipeline including corpus preparation and pheval run $(MAKE) prepare-inputs $(MAKE) prepare-corpora $(MAKE) pheval-run make semsim generate all configured similarity profiles make semsim-shuffle generate new ontology terms to the semsim process make semsim-scramble scramble semsim profile make semsim-convert convert all semsim profiles into exomiser SQL format make semsim-ingest takes all the configured semsim profiles and loads them into the exomiser databases","title":"PhEval Pipeline"},{"location":"pipeline/#pheval-pipeline","text":"","title":"PhEval Pipeline"},{"location":"pipeline/#1-clone-pheval","text":"git clone https://github.com/monarch-initiative/pheval.git","title":"1. Clone PhEval"},{"location":"pipeline/#2-installing-pheval-dependencies","text":"Enter in the cloned folder and enter the following commands: poetry shell poetry install","title":"2. Installing PhEval dependencies"},{"location":"pipeline/#3-generate-custom-makefile","text":"You must have Jinja2 installed, if you don't follow the steps here In resources folder are the following files responsible for makefile generation: \ud83d\udce6resources \u2523 \ud83d\udcdcMakefile.j2 \u2523 \ud83d\udcdccustom.Makefile \u2523 \ud83d\udcdcgeneratemakefile.sh \u2517 \ud83d\udcdcpheval-config.yaml You must edit the pheval-config.yaml file setting the directory where you extracted exomiser and phenotype data. An example could be found here . After setting the pheval-config.yaml file flowchart TD inputs[\"prepare-inputs\"] sr1[\"Setting up Runners\"] corpora[\"prepare-corpora\"] scrambling[\"Scrambing Process\"] r1[\"run\"] inputs === sr1 sr1 === corpora corpora === scrambling scrambling === r1","title":"3. Generate custom Makefile"},{"location":"pipeline/#data-flow","text":"flowchart LR vcf[(\"Phenopackets Original Data\")] pheno[(\"Scrambled Phenopackets\")] result[\"Phenotype Result\"] vcf -- prepare-corpora --> pheno pheno -- scramble factor e.g 0.5 --> result","title":"Data Flow"},{"location":"pipeline/#jinja-template-pheval-makefile-generator-requirements","text":"To generate a PhEval Makefile we use the Jinja template engine.","title":"Jinja Template PhEval Makefile Generator Requirements"},{"location":"pipeline/#installing-jinja-template","text":"Linux (Ubuntu): sudo snap install j2 Mac OS:","title":"Installing Jinja Template"},{"location":"pipeline/#pheval-makefile-template-j2-file","text":"\ud83d\udce6resources \u2523 \ud83d\udcdc Makefile.j2 custom.Makefile is the template that will be generated on the fly based on the pheval-config.yaml . Each of these configurations is filled using a syntax like this: {{ config.tool }} . The value between the curly brackets is replaced by the corresponding configuration in the configuration file.","title":"PhEval Makefile Template (.j2 file)"},{"location":"pipeline/#pheval-custommakefile","text":"\ud83d\udce6resources \u2523 \ud83d\udcdc custom.Makefile","title":"PhEval custom.Makefile"},{"location":"pipeline/#pheval-generatemakefilesh","text":"\ud83d\udce6resources \u2523 \ud83d\udcdcgeneratemakefile.sh generatemakefile.sh is only a shortcut for Makefile rendering using the configuration file e.g. bash ./resources/generatemakefile.sh","title":"PhEval generatemakefile.sh"},{"location":"pipeline/#pheval-configuration-file","text":"In resources folder, there is a file named pheval-config.yaml , this file is responsible for storing the PhEval Makefile generation. \ud83d\udce6resources \u2517 \ud83d\udcdcpheval-config.yaml","title":"PhEval Configuration File"},{"location":"pipeline/#directories-section","text":"directories : tmp : data/tmp h2jar : ./h2-1.4.199.jar phen2gene : ./Phen2Gene exomiser : /home/data/exomiser/exomiser-cli-13.2.0-distribution/exomiser-cli-13.2.0 phenotype : /home/data/phenotype workspace : /tmp/pheval","title":"Directories Section"},{"location":"pipeline/#configs-section","text":"configs : - tool : phen2gene version : 1.2.3 configuration : default - tool : exomiser version : 13.2.0 configuration : default exomiser_db : semsim1 This section is responsible for setting up the configuration folder. All software declared in the configs section will be linked in this folder. In the configuration above, for example, we have one configuration for phen2gene and one for exomiser. In the Directories Section , these two configurations must have one corresponding property set up. PhEval pipeline invokes the prepare-inputs goal, and in the preceding example, a configuration folder structure will be built that looks like this: \ud83d\udce6configurations \u2523 \ud83d\udcc2exomiser-13.2.0-default \u2517 \ud83d\udcc2phen2gene-1.2.3-default Each of these folders is a symbolic link that points to the corresponding software folder indicated in the Directories Section","title":"Configs Section"},{"location":"pipeline/#corpora-section","text":"corpora : - id : lirical scrambled : - factor : 0.5 - factor : 0.7 custom_variants : - id : no_phenotype - id : phen2gene scrambled : - factor : 0.2 - factor : 0.9 custom_variants : - id : no_phenotype In this corpora section we can set up different experiments for corpus scrambling. Currently, PhEval provides corpora data from lirical, phen2gene, small_test and structural_variants \ud83d\udce6corpora \u2523 \ud83d\udcc2lirical \u2523 \ud83d\udcc2phen2gene \u2523 \ud83d\udcc2small_test \u2517 \ud83d\udcc2structural_variants The scramble property defines the magnitude of the scrambling factor during Phenopackets and VCF variants spiking process. Using the configuration in the example above, a corpora structure will be created like this: \ud83d\udce6corpora \u2523 \ud83d\udcc2lirical \u2503 \u2517 \ud83d\udcc2default \u2503 \u2517 \ud83d\udcc2scrambled-0.5 \u2503 \u2517 \ud83d\udcc2scrambled-0.7 \u2523 \ud83d\udcc2phen2gene \u2503 \u2517 \ud83d\udcc2default \u2503 \u2517 \ud83d\udcc2scrambled-0.2 \u2503 \u2517 \ud83d\udcc2scrambled-0.9","title":"Corpora Section"},{"location":"pipeline/#runs-section","text":"runs : - tool : exomiser configuration : default corpus : lirical corpusvariant : scrambled-0.5 version : 13.2.0 - tool : phen2gene configuration : default corpus : phen2gene corpusvariant : scrambled-0.2 version : 1.2.3","title":"Runs Section"},{"location":"pipeline/#phen2gen-specific-configuration","text":"The input directory config.yaml should be formatted like the example below and must be placed in phen2gene: /pathtoPhen2Gene/Phen2Gene declared in pheval-config.yaml file. tool : phen2gene tool_version : 1.2.3 phenotype_only : True tool_specific_configuration_options : environment : local phen2gene_python_executable : phen2gene.py post_process : score_order : descending","title":"Phen2Gen Specific Configuration"},{"location":"pipeline/#makefile-goals","text":"","title":"Makefile Goals"},{"location":"pipeline/#make-pheval","text":"this runs the entire pipeline including corpus preparation and pheval run $(MAKE) prepare-inputs $(MAKE) prepare-corpora $(MAKE) pheval-run","title":"make pheval"},{"location":"pipeline/#make-semsim","text":"generate all configured similarity profiles","title":"make semsim"},{"location":"pipeline/#make-semsim-shuffle","text":"generate new ontology terms to the semsim process","title":"make semsim-shuffle"},{"location":"pipeline/#make-semsim-scramble","text":"scramble semsim profile","title":"make semsim-scramble"},{"location":"pipeline/#make-semsim-convert","text":"convert all semsim profiles into exomiser SQL format","title":"make semsim-convert"},{"location":"pipeline/#make-semsim-ingest","text":"takes all the configured semsim profiles and loads them into the exomiser databases","title":"make semsim-ingest"},{"location":"plugins/","text":"A full list of implemented PhEval runners are listed below along with links to the original tool: Tool PhEval plugin Comment Exomiser Exomiser runner The link to the original tool can be found here Phen2Gene Phen2Gene runner The link to the original tool can be found here LIRICAL LIRICAL runner The link to the original tool can be found here SvAnna SvAnna runner The link to the original tool can be found here GADO GADO runner The link to the original tool can be found here Template Template runner OntoGPT OntoGPT runner ELDER ELDER runner MALCO MALCO runner AI MARRVEL AI MARRVEL runner The link to the original tool can be found here OAK OAK runner","title":"Plugins"},{"location":"roadmap/","text":"Roadmap The Roadmap is a rough plan, changes are expected throughout the year. 2023 Q1 Finalising the PhEval architecture (draft is done) End-to-end pipeline for testing PhEval with Exomiser and two versions of HPO Submitting a poster to Biocuration which outlines the full vision Q2 Focus on an analytic framework around PhEval, focusing on studying how changes to ontologies affect changes in variant prioritisation Extend phenotype pipeline to enable base releases and alternative patterns Q3 Improving the analytic framework of PhEval, especially phenotype analysis All intermediate files of pipeline have a corresponding LinkML model Focus on studying the effect of KG snippets (p2ds) on VP performance Q4 Drafting a PhEval paper Building standalone pipeline that reports changes in algorithm behaviours to ontology developers.","title":"Roadmap"},{"location":"roadmap/#roadmap","text":"The Roadmap is a rough plan, changes are expected throughout the year.","title":"Roadmap"},{"location":"roadmap/#2023","text":"","title":"2023"},{"location":"roadmap/#q1","text":"Finalising the PhEval architecture (draft is done) End-to-end pipeline for testing PhEval with Exomiser and two versions of HPO Submitting a poster to Biocuration which outlines the full vision","title":"Q1"},{"location":"roadmap/#q2","text":"Focus on an analytic framework around PhEval, focusing on studying how changes to ontologies affect changes in variant prioritisation Extend phenotype pipeline to enable base releases and alternative patterns","title":"Q2"},{"location":"roadmap/#q3","text":"Improving the analytic framework of PhEval, especially phenotype analysis All intermediate files of pipeline have a corresponding LinkML model Focus on studying the effect of KG snippets (p2ds) on VP performance","title":"Q3"},{"location":"roadmap/#q4","text":"Drafting a PhEval paper Building standalone pipeline that reports changes in algorithm behaviours to ontology developers.","title":"Q4"},{"location":"styleguide/","text":"Monarch Style Guide for PhEval No code in CLI methods","title":"Monarch Style Guide for PhEval"},{"location":"styleguide/#monarch-style-guide-for-pheval","text":"No code in CLI methods","title":"Monarch Style Guide for PhEval"},{"location":"api/pheval/cli/","text":"main main CLI method for PhEval Args: verbose (int, optional): Verbose flag. quiet (bool, optional): Queit Flag. Usage: main [OPTIONS] COMMAND [ARGS]... Options: Name Type Description Default -v , --verbose integer range ( 0 and above) N/A 0 -q , --quiet text N/A None --help boolean Show this message and exit. False pheval pheval Usage: pheval [OPTIONS] COMMAND [ARGS]... Options: Name Type Description Default --help boolean Show this message and exit. False Subcommands run : PhEval Runner Command Line Interface run PhEval Runner Command Line Interface Args: input_dir (Path): The input directory (relative path: e.g exomiser-13.11) testdata_dir (Path): The input directory (relative path: e.g ./data runner (str): Runner implementation (e.g exomiser-13.11) tmp_dir (Path): The path of the temporary directory (optional) output_dir (Path): The path of the output directory config (Path): The path of the configuration file (optional e.g., config.yaml) version (str): The version of the tool implementation Usage: pheval run [OPTIONS] Options: Name Type Description Default --input-dir , -i Path The input directory (relative path: e.g exomiser-13.11) _required --testdata-dir , -t Path The input directory (relative path: e.g ./data) _required --runner , -r text Runner implementation (e.g exomiser-13.11) _required --tmp-dir , -m Path The path of the temporary directory (optional) None --output-dir , -o Path The path of the output directory _required --config , -c Path The path of the configuration file (optional e.g config.yaml) None --version , -v text Version of the tool implementation. None --help boolean Show this message and exit. False pheval-utils pheval_utils Usage: pheval-utils [OPTIONS] COMMAND [ARGS]... Options: Name Type Description Default --help boolean Show this message and exit. False Subcommands benchmark : Benchmark the gene/variant/disease prioritisation performance for a single run. benchmark-comparison : Benchmark the gene/variant/disease prioritisation performance for two runs. create-spiked-vcfs : generate-stats-plot : Generate bar plot from benchmark stats summary tsv. prepare-corpus : scramble-phenopackets : Generate noisy phenopackets from existing ones. semsim-scramble : Scrambles semsim profile multiplying score value by scramble factor semsim-to-exomiserdb : ingests semsim file into exomiser phenotypic database update-phenopackets : Update gene symbols and identifiers for phenopackets. benchmark Benchmark the gene/variant/disease prioritisation performance for a single run. Usage: pheval-utils benchmark [OPTIONS] Options: Name Type Description Default --directory , -d Path General results directory to be benchmarked, assumes contains subdirectories of pheval_gene_results/,pheval_variant_results/ or pheval_disease_results/. _required --phenopacket-dir , -p Path Full path to directory containing input phenopackets. _required --output-prefix , -o text Output file prefix. _required --score-order , -so choice ( ascending | descending ) Ordering of results for ranking. descending --threshold , -t float Score threshold. 0.0 --gene-analysis / --no-gene-analysis boolean Specify analysis for gene prioritisation False --variant-analysis / --no-variant-analysis boolean Specify analysis for variant prioritisation False --disease-analysis / --no-disease-analysis boolean Specify analysis for disease prioritisation False --plot-type , -y choice ( bar_stacked | bar_cumulative | bar_non_cumulative ) Bar chart type to output. bar_stacked --help boolean Show this message and exit. False benchmark-comparison Benchmark the gene/variant/disease prioritisation performance for two runs. Usage: pheval-utils benchmark-comparison [OPTIONS] Options: Name Type Description Default --run-data , -r Path Path to .txt file containing testdata phenopacket directory and corresponding results directory separated by tab.Each run contained to a new line with the input testdata listed first and on the same line separated by a tabthe results directory. _required --output-prefix , -o text Output file prefix. _required --score-order , -so choice ( ascending | descending ) Ordering of results for ranking. descending --threshold , -t float Score threshold. 0.0 --gene-analysis / --no-gene-analysis boolean Specify analysis for gene prioritisation False --variant-analysis / --no-variant-analysis boolean Specify analysis for variant prioritisation False --disease-analysis / --no-disease-analysis boolean Specify analysis for disease prioritisation False --plot-type , -y choice ( bar_stacked | bar_cumulative | bar_non_cumulative ) Bar chart type to output. bar_cumulative --help boolean Show this message and exit. False create-spiked-vcfs Create spiked VCF from either a Phenopacket or a Phenopacket directory. Args: phenopacket_path (Path): Path to a single Phenopacket file (optional). phenopacket_dir (Path): Path to a directory containing Phenopacket files (optional). output_dir (Path): The directory to store the generated spiked VCF file(s). hg19_template_vcf (Path): Path to the hg19 template VCF file (optional). hg38_template_vcf (Path): Path to the hg38 template VCF file (optional). hg19_vcf_dir (Path): Path to the directory containing the hg19 VCF files (optional). hg38_vcf_dir (Path): Path to the directory containing the hg38 VCF files (optional). Usage: pheval-utils create-spiked-vcfs [OPTIONS] Options: Name Type Description Default --phenopacket-path , -p Path Path to phenopacket. NOTE: This argument is mutually exclusive with arguments: [phenopacket_dir]. None --phenopacket-dir , -P Path Path to phenopacket directory for updating. NOTE: This argument is mutually exclusive with arguments: [phenopacket_path]. None --hg19-template-vcf , -hg19 Path Template hg19 VCF file NOTE: This argument is mutually exclusive with arguments: [hg19_vcf_dir]. None --hg38-template-vcf , -hg38 Path Template hg38 VCF file NOTE: This argument is mutually exclusive with arguments: [hg38_vcf_dir]. None --hg19-vcf-dir , -hg19-dir Path Path to directory containing hg19 VCF templates. NOTE: This argument is mutually exclusive with arguments: [hg19_template_vcf]. None --hg38-vcf-dir , -hg38-dir Path Path to directory containing hg38 VCF templates. NOTE: This argument is mutually exclusive with arguments: [hg38_template_vcf]. None --output-dir , -O Path Path for creation of output directory vcf --help boolean Show this message and exit. False generate-stats-plot Generate bar plot from benchmark stats summary tsv. Usage: pheval-utils generate-stats-plot [OPTIONS] Options: Name Type Description Default --benchmarking-tsv , -b Path Path to benchmark summary tsv output by PhEval benchmark commands. _required --gene-analysis / --no-gene-analysis boolean Specify analysis for gene prioritisation NOTE: This argument is mutually exclusive with arguments: [disease_analysis, variant_analysis]. False --variant-analysis / --no-variant-analysis boolean Specify analysis for variant prioritisation NOTE: This argument is mutually exclusive with arguments: [gene_analysis, disease_analysis]. False --disease-analysis / --no-disease-analysis boolean Specify analysis for disease prioritisation NOTE: This argument is mutually exclusive with arguments: [gene_analysis, variant_analysis]. False --plot-type , -y choice ( bar_stacked | bar_cumulative | bar_non_cumulative ) Bar chart type to output. bar_cumulative --title , -t text Title for plot, specify the title on the CLI enclosed with \"\" None --help boolean Show this message and exit. False prepare-corpus Prepare a corpus of Phenopackets for analysis, optionally checking for complete variant records and updating gene identifiers. Args: phenopacket_dir (Path): The path to the directory containing Phenopackets. variant_analysis (bool): If True, check for complete variant records in the Phenopackets. gene_analysis (bool): If True, check for complete gene records in the Phenopackets. disease_analysis (bool): If True, check for complete disease records in the Phenopackets. gene_identifier (str): Identifier for updating gene identifiers, if applicable. hg19_template_vcf (Path): Path to the hg19 template VCF file (optional). hg38_template_vcf (Path): Path to the hg38 template VCF file (optional). hg19_vcf_dir (Path): Path to the directory containing the hg19 VCF files (optional). hg38_vcf_dir (Path): Path to the directory containing the hg38 VCF files (optional). output_dir (Path): The directory to save the prepared Phenopackets and, optionally, VCF files. Notes: To spike variants into VCFs for variant-based analysis at least one of hg19_template_vcf, hg38_template_vcf, hg19_vcf_dir or hg38_vcf_dir is required. Usage: pheval-utils prepare-corpus [OPTIONS] Options: Name Type Description Default --phenopacket-dir , -p Path Path to phenopacket corpus directory.. _required --variant-analysis / --no-variant-analysis boolean Specify whether to check for complete variant records in the phenopackets. False --gene-analysis / --no-gene-analysis boolean Specify whether to check for complete gene records in the phenopackets. False --disease-analysis / --no-disease-analysis boolean Specify whether to check for complete disease records in the phenopackets. False --gene-identifier , -g choice ( ensembl_id | entrez_id | hgnc_id ) Gene identifier to update in phenopacket None --hg19-template-vcf , -hg19 Path Template hg19 VCF file NOTE: This argument is mutually exclusive with arguments: [hg19_vcf_dir]. None --hg38-template-vcf , -hg38 Path Template hg38 VCF file NOTE: This argument is mutually exclusive with arguments: [hg38_vcf_dir]. None --hg19-vcf-dir , -hg19-dir Path Path to directory containing hg19 VCF templates. NOTE: This argument is mutually exclusive with arguments: [hg19_template_vcf]. None --hg38-vcf-dir , -hg38-dir Path Path to directory containing hg38 VCF templates. NOTE: This argument is mutually exclusive with arguments: [hg38_template_vcf]. None --output-dir , -o Path Path to output prepared corpus. prepared_corpus --help boolean Show this message and exit. False scramble-phenopackets Generate noisy phenopackets from existing ones. Usage: pheval-utils scramble-phenopackets [OPTIONS] Options: Name Type Description Default --phenopacket-path , -p Path Path to phenopacket. NOTE: This argument is mutually exclusive with arguments: [phenopacket_dir]. None --phenopacket-dir , -P Path Path to phenopackets directory. NOTE: This argument is mutually exclusive with arguments: [phenopacket_path]. None --scramble-factor , -s float Scramble factor for randomising phenopacket phenotypic profiles. 0.5 --output-dir , -O Path Path for creation of output directory noisy_phenopackets --help boolean Show this message and exit. False semsim-scramble Scrambles semsim profile multiplying score value by scramble factor Args: input (Path): Path file that points out to the semsim profile output (Path): Path file that points out to the output file score_column (List[str]): Score column(s) that will be scrambled scramble_factor (float): Scramble Magnitude Usage: pheval-utils semsim-scramble [OPTIONS] Options: Name Type Description Default --input , -i Path Path to the semantic similarity profile to be scrambled. _required --output , -o Path Path where the scrambled semsim file will be written. _required --score-column , -c choice ( jaccard_similarity | dice_similarity | phenodigm_score ) Score column that will be scrambled _required --scramble-factor , -s float Scramble Magnitude (noise) that will be applied to semantic similarity score column (e.g. jaccard similarity). 0.5 --help boolean Show this message and exit. False semsim-to-exomiserdb ingests semsim file into exomiser phenotypic database Args: input_file (Path): semsim input file. e.g phenio-plus-hp-mp.0.semsimian.tsv object_prefix (str): object prefix. e.g. MP subject_prefix (str): subject prefix e.g HP db_path (Path): Exomiser Phenotypic Database Folder Path. (e.g. /exomiser_folder/2209_phenotype/2209_phenotype/) Usage: pheval-utils semsim-to-exomiserdb [OPTIONS] Options: Name Type Description Default --input-file , -i Path Semsim input file. _required --object-prefix text Object Prefix. e.g. MP _required --subject-prefix text Subject Prefix. e.g. HP _required --db-path , -d Path Exomiser Phenotypic Database Folder Path. (e.g. /exomiser_folder/2209_phenotype/2209_phenotype/). This is the path where the phenotypic database folder will be written out. _required --help boolean Show this message and exit. False update-phenopackets Update gene symbols and identifiers for phenopackets. Usage: pheval-utils update-phenopackets [OPTIONS] Options: Name Type Description Default --phenopacket-path , -p Path Path to phenopacket. NOTE: This argument is mutually exclusive with arguments: [phenopacket_dir]. None --phenopacket-dir , -P Path Path to phenopacket directory for updating. NOTE: This argument is mutually exclusive with arguments: [phenopacket_path]. None --output-dir , -o Path Path to write phenopacket. _required --gene-identifier , -g choice ( ensembl_id | entrez_id | hgnc_id ) Gene identifier to add to phenopacket ensembl_id --help boolean Show this message and exit. False","title":"Cli"},{"location":"api/pheval/cli/#main","text":"main CLI method for PhEval Args: verbose (int, optional): Verbose flag. quiet (bool, optional): Queit Flag. Usage: main [OPTIONS] COMMAND [ARGS]... Options: Name Type Description Default -v , --verbose integer range ( 0 and above) N/A 0 -q , --quiet text N/A None --help boolean Show this message and exit. False","title":"main"},{"location":"api/pheval/cli/#pheval","text":"pheval Usage: pheval [OPTIONS] COMMAND [ARGS]... Options: Name Type Description Default --help boolean Show this message and exit. False Subcommands run : PhEval Runner Command Line Interface","title":"pheval"},{"location":"api/pheval/cli/#run","text":"PhEval Runner Command Line Interface Args: input_dir (Path): The input directory (relative path: e.g exomiser-13.11) testdata_dir (Path): The input directory (relative path: e.g ./data runner (str): Runner implementation (e.g exomiser-13.11) tmp_dir (Path): The path of the temporary directory (optional) output_dir (Path): The path of the output directory config (Path): The path of the configuration file (optional e.g., config.yaml) version (str): The version of the tool implementation Usage: pheval run [OPTIONS] Options: Name Type Description Default --input-dir , -i Path The input directory (relative path: e.g exomiser-13.11) _required --testdata-dir , -t Path The input directory (relative path: e.g ./data) _required --runner , -r text Runner implementation (e.g exomiser-13.11) _required --tmp-dir , -m Path The path of the temporary directory (optional) None --output-dir , -o Path The path of the output directory _required --config , -c Path The path of the configuration file (optional e.g config.yaml) None --version , -v text Version of the tool implementation. None --help boolean Show this message and exit. False","title":"run"},{"location":"api/pheval/cli/#pheval-utils","text":"pheval_utils Usage: pheval-utils [OPTIONS] COMMAND [ARGS]... Options: Name Type Description Default --help boolean Show this message and exit. False Subcommands benchmark : Benchmark the gene/variant/disease prioritisation performance for a single run. benchmark-comparison : Benchmark the gene/variant/disease prioritisation performance for two runs. create-spiked-vcfs : generate-stats-plot : Generate bar plot from benchmark stats summary tsv. prepare-corpus : scramble-phenopackets : Generate noisy phenopackets from existing ones. semsim-scramble : Scrambles semsim profile multiplying score value by scramble factor semsim-to-exomiserdb : ingests semsim file into exomiser phenotypic database update-phenopackets : Update gene symbols and identifiers for phenopackets.","title":"pheval-utils"},{"location":"api/pheval/cli/#benchmark","text":"Benchmark the gene/variant/disease prioritisation performance for a single run. Usage: pheval-utils benchmark [OPTIONS] Options: Name Type Description Default --directory , -d Path General results directory to be benchmarked, assumes contains subdirectories of pheval_gene_results/,pheval_variant_results/ or pheval_disease_results/. _required --phenopacket-dir , -p Path Full path to directory containing input phenopackets. _required --output-prefix , -o text Output file prefix. _required --score-order , -so choice ( ascending | descending ) Ordering of results for ranking. descending --threshold , -t float Score threshold. 0.0 --gene-analysis / --no-gene-analysis boolean Specify analysis for gene prioritisation False --variant-analysis / --no-variant-analysis boolean Specify analysis for variant prioritisation False --disease-analysis / --no-disease-analysis boolean Specify analysis for disease prioritisation False --plot-type , -y choice ( bar_stacked | bar_cumulative | bar_non_cumulative ) Bar chart type to output. bar_stacked --help boolean Show this message and exit. False","title":"benchmark"},{"location":"api/pheval/cli/#benchmark-comparison","text":"Benchmark the gene/variant/disease prioritisation performance for two runs. Usage: pheval-utils benchmark-comparison [OPTIONS] Options: Name Type Description Default --run-data , -r Path Path to .txt file containing testdata phenopacket directory and corresponding results directory separated by tab.Each run contained to a new line with the input testdata listed first and on the same line separated by a tabthe results directory. _required --output-prefix , -o text Output file prefix. _required --score-order , -so choice ( ascending | descending ) Ordering of results for ranking. descending --threshold , -t float Score threshold. 0.0 --gene-analysis / --no-gene-analysis boolean Specify analysis for gene prioritisation False --variant-analysis / --no-variant-analysis boolean Specify analysis for variant prioritisation False --disease-analysis / --no-disease-analysis boolean Specify analysis for disease prioritisation False --plot-type , -y choice ( bar_stacked | bar_cumulative | bar_non_cumulative ) Bar chart type to output. bar_cumulative --help boolean Show this message and exit. False","title":"benchmark-comparison"},{"location":"api/pheval/cli/#create-spiked-vcfs","text":"Create spiked VCF from either a Phenopacket or a Phenopacket directory. Args: phenopacket_path (Path): Path to a single Phenopacket file (optional). phenopacket_dir (Path): Path to a directory containing Phenopacket files (optional). output_dir (Path): The directory to store the generated spiked VCF file(s). hg19_template_vcf (Path): Path to the hg19 template VCF file (optional). hg38_template_vcf (Path): Path to the hg38 template VCF file (optional). hg19_vcf_dir (Path): Path to the directory containing the hg19 VCF files (optional). hg38_vcf_dir (Path): Path to the directory containing the hg38 VCF files (optional). Usage: pheval-utils create-spiked-vcfs [OPTIONS] Options: Name Type Description Default --phenopacket-path , -p Path Path to phenopacket. NOTE: This argument is mutually exclusive with arguments: [phenopacket_dir]. None --phenopacket-dir , -P Path Path to phenopacket directory for updating. NOTE: This argument is mutually exclusive with arguments: [phenopacket_path]. None --hg19-template-vcf , -hg19 Path Template hg19 VCF file NOTE: This argument is mutually exclusive with arguments: [hg19_vcf_dir]. None --hg38-template-vcf , -hg38 Path Template hg38 VCF file NOTE: This argument is mutually exclusive with arguments: [hg38_vcf_dir]. None --hg19-vcf-dir , -hg19-dir Path Path to directory containing hg19 VCF templates. NOTE: This argument is mutually exclusive with arguments: [hg19_template_vcf]. None --hg38-vcf-dir , -hg38-dir Path Path to directory containing hg38 VCF templates. NOTE: This argument is mutually exclusive with arguments: [hg38_template_vcf]. None --output-dir , -O Path Path for creation of output directory vcf --help boolean Show this message and exit. False","title":"create-spiked-vcfs"},{"location":"api/pheval/cli/#generate-stats-plot","text":"Generate bar plot from benchmark stats summary tsv. Usage: pheval-utils generate-stats-plot [OPTIONS] Options: Name Type Description Default --benchmarking-tsv , -b Path Path to benchmark summary tsv output by PhEval benchmark commands. _required --gene-analysis / --no-gene-analysis boolean Specify analysis for gene prioritisation NOTE: This argument is mutually exclusive with arguments: [disease_analysis, variant_analysis]. False --variant-analysis / --no-variant-analysis boolean Specify analysis for variant prioritisation NOTE: This argument is mutually exclusive with arguments: [gene_analysis, disease_analysis]. False --disease-analysis / --no-disease-analysis boolean Specify analysis for disease prioritisation NOTE: This argument is mutually exclusive with arguments: [gene_analysis, variant_analysis]. False --plot-type , -y choice ( bar_stacked | bar_cumulative | bar_non_cumulative ) Bar chart type to output. bar_cumulative --title , -t text Title for plot, specify the title on the CLI enclosed with \"\" None --help boolean Show this message and exit. False","title":"generate-stats-plot"},{"location":"api/pheval/cli/#prepare-corpus","text":"Prepare a corpus of Phenopackets for analysis, optionally checking for complete variant records and updating gene identifiers. Args: phenopacket_dir (Path): The path to the directory containing Phenopackets. variant_analysis (bool): If True, check for complete variant records in the Phenopackets. gene_analysis (bool): If True, check for complete gene records in the Phenopackets. disease_analysis (bool): If True, check for complete disease records in the Phenopackets. gene_identifier (str): Identifier for updating gene identifiers, if applicable. hg19_template_vcf (Path): Path to the hg19 template VCF file (optional). hg38_template_vcf (Path): Path to the hg38 template VCF file (optional). hg19_vcf_dir (Path): Path to the directory containing the hg19 VCF files (optional). hg38_vcf_dir (Path): Path to the directory containing the hg38 VCF files (optional). output_dir (Path): The directory to save the prepared Phenopackets and, optionally, VCF files. Notes: To spike variants into VCFs for variant-based analysis at least one of hg19_template_vcf, hg38_template_vcf, hg19_vcf_dir or hg38_vcf_dir is required. Usage: pheval-utils prepare-corpus [OPTIONS] Options: Name Type Description Default --phenopacket-dir , -p Path Path to phenopacket corpus directory.. _required --variant-analysis / --no-variant-analysis boolean Specify whether to check for complete variant records in the phenopackets. False --gene-analysis / --no-gene-analysis boolean Specify whether to check for complete gene records in the phenopackets. False --disease-analysis / --no-disease-analysis boolean Specify whether to check for complete disease records in the phenopackets. False --gene-identifier , -g choice ( ensembl_id | entrez_id | hgnc_id ) Gene identifier to update in phenopacket None --hg19-template-vcf , -hg19 Path Template hg19 VCF file NOTE: This argument is mutually exclusive with arguments: [hg19_vcf_dir]. None --hg38-template-vcf , -hg38 Path Template hg38 VCF file NOTE: This argument is mutually exclusive with arguments: [hg38_vcf_dir]. None --hg19-vcf-dir , -hg19-dir Path Path to directory containing hg19 VCF templates. NOTE: This argument is mutually exclusive with arguments: [hg19_template_vcf]. None --hg38-vcf-dir , -hg38-dir Path Path to directory containing hg38 VCF templates. NOTE: This argument is mutually exclusive with arguments: [hg38_template_vcf]. None --output-dir , -o Path Path to output prepared corpus. prepared_corpus --help boolean Show this message and exit. False","title":"prepare-corpus"},{"location":"api/pheval/cli/#scramble-phenopackets","text":"Generate noisy phenopackets from existing ones. Usage: pheval-utils scramble-phenopackets [OPTIONS] Options: Name Type Description Default --phenopacket-path , -p Path Path to phenopacket. NOTE: This argument is mutually exclusive with arguments: [phenopacket_dir]. None --phenopacket-dir , -P Path Path to phenopackets directory. NOTE: This argument is mutually exclusive with arguments: [phenopacket_path]. None --scramble-factor , -s float Scramble factor for randomising phenopacket phenotypic profiles. 0.5 --output-dir , -O Path Path for creation of output directory noisy_phenopackets --help boolean Show this message and exit. False","title":"scramble-phenopackets"},{"location":"api/pheval/cli/#semsim-scramble","text":"Scrambles semsim profile multiplying score value by scramble factor Args: input (Path): Path file that points out to the semsim profile output (Path): Path file that points out to the output file score_column (List[str]): Score column(s) that will be scrambled scramble_factor (float): Scramble Magnitude Usage: pheval-utils semsim-scramble [OPTIONS] Options: Name Type Description Default --input , -i Path Path to the semantic similarity profile to be scrambled. _required --output , -o Path Path where the scrambled semsim file will be written. _required --score-column , -c choice ( jaccard_similarity | dice_similarity | phenodigm_score ) Score column that will be scrambled _required --scramble-factor , -s float Scramble Magnitude (noise) that will be applied to semantic similarity score column (e.g. jaccard similarity). 0.5 --help boolean Show this message and exit. False","title":"semsim-scramble"},{"location":"api/pheval/cli/#semsim-to-exomiserdb","text":"ingests semsim file into exomiser phenotypic database Args: input_file (Path): semsim input file. e.g phenio-plus-hp-mp.0.semsimian.tsv object_prefix (str): object prefix. e.g. MP subject_prefix (str): subject prefix e.g HP db_path (Path): Exomiser Phenotypic Database Folder Path. (e.g. /exomiser_folder/2209_phenotype/2209_phenotype/) Usage: pheval-utils semsim-to-exomiserdb [OPTIONS] Options: Name Type Description Default --input-file , -i Path Semsim input file. _required --object-prefix text Object Prefix. e.g. MP _required --subject-prefix text Subject Prefix. e.g. HP _required --db-path , -d Path Exomiser Phenotypic Database Folder Path. (e.g. /exomiser_folder/2209_phenotype/2209_phenotype/). This is the path where the phenotypic database folder will be written out. _required --help boolean Show this message and exit. False","title":"semsim-to-exomiserdb"},{"location":"api/pheval/cli/#update-phenopackets","text":"Update gene symbols and identifiers for phenopackets. Usage: pheval-utils update-phenopackets [OPTIONS] Options: Name Type Description Default --phenopacket-path , -p Path Path to phenopacket. NOTE: This argument is mutually exclusive with arguments: [phenopacket_dir]. None --phenopacket-dir , -P Path Path to phenopacket directory for updating. NOTE: This argument is mutually exclusive with arguments: [phenopacket_path]. None --output-dir , -o Path Path to write phenopacket. _required --gene-identifier , -g choice ( ensembl_id | entrez_id | hgnc_id ) Gene identifier to add to phenopacket ensembl_id --help boolean Show this message and exit. False","title":"update-phenopackets"},{"location":"api/pheval/config_parser/","text":"InputDirConfig dataclass Class for defining the fields within the input directory config. Parameters: Name Type Description Default tool str Name of the tool implementation (e.g. exomiser/phen2gene) required tool_version str Version of the tool implementation required variant_analysis bool Whether to extract prioritised variants from results. required gene_analysis bool Whether to extract prioritised genes from results. required disease_analysis bool Whether to extract prioritised diseases from results. required tool_specific_configuration_options Any Tool specific configurations required Source code in src/pheval/config_parser.py 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 @serde @dataclass class InputDirConfig : \"\"\" Class for defining the fields within the input directory config. Args: tool (str): Name of the tool implementation (e.g. exomiser/phen2gene) tool_version (str): Version of the tool implementation variant_analysis (bool): Whether to extract prioritised variants from results. gene_analysis (bool): Whether to extract prioritised genes from results. disease_analysis (bool): Whether to extract prioritised diseases from results. tool_specific_configuration_options (Any): Tool specific configurations \"\"\" tool : str tool_version : str variant_analysis : bool gene_analysis : bool disease_analysis : bool tool_specific_configuration_options : Any parse_input_dir_config ( input_dir ) Reads the config file. Source code in src/pheval/config_parser.py 35 36 37 38 39 40 def parse_input_dir_config ( input_dir : Path ) -> InputDirConfig : \"\"\"Reads the config file.\"\"\" with open ( Path ( input_dir ) . joinpath ( \"config.yaml\" ), \"r\" ) as config_file : config = yaml . safe_load ( config_file ) config_file . close () return from_yaml ( InputDirConfig , yaml . dump ( config ))","title":"Config parser"},{"location":"api/pheval/config_parser/#src.pheval.config_parser.InputDirConfig","text":"Class for defining the fields within the input directory config. Parameters: Name Type Description Default tool str Name of the tool implementation (e.g. exomiser/phen2gene) required tool_version str Version of the tool implementation required variant_analysis bool Whether to extract prioritised variants from results. required gene_analysis bool Whether to extract prioritised genes from results. required disease_analysis bool Whether to extract prioritised diseases from results. required tool_specific_configuration_options Any Tool specific configurations required Source code in src/pheval/config_parser.py 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 @serde @dataclass class InputDirConfig : \"\"\" Class for defining the fields within the input directory config. Args: tool (str): Name of the tool implementation (e.g. exomiser/phen2gene) tool_version (str): Version of the tool implementation variant_analysis (bool): Whether to extract prioritised variants from results. gene_analysis (bool): Whether to extract prioritised genes from results. disease_analysis (bool): Whether to extract prioritised diseases from results. tool_specific_configuration_options (Any): Tool specific configurations \"\"\" tool : str tool_version : str variant_analysis : bool gene_analysis : bool disease_analysis : bool tool_specific_configuration_options : Any","title":"InputDirConfig"},{"location":"api/pheval/config_parser/#src.pheval.config_parser.parse_input_dir_config","text":"Reads the config file. Source code in src/pheval/config_parser.py 35 36 37 38 39 40 def parse_input_dir_config ( input_dir : Path ) -> InputDirConfig : \"\"\"Reads the config file.\"\"\" with open ( Path ( input_dir ) . joinpath ( \"config.yaml\" ), \"r\" ) as config_file : config = yaml . safe_load ( config_file ) config_file . close () return from_yaml ( InputDirConfig , yaml . dump ( config ))","title":"parse_input_dir_config()"},{"location":"api/pheval/constants/","text":"","title":"Constants"},{"location":"api/pheval/run_metadata/","text":"BasicOutputRunMetaData dataclass Class for defining variables for the run metadata. Parameters: Name Type Description Default tool str Name of the tool implementation required tool_version str Version of the tool implementation required config Path Path to the config file located in the input directory required run_timestamp int Time taken for run to complete required corpus Path Path to corpus used in pheval run required tool_specific_configuration_options Any Special field that can be overwritten by tool implementations to contain any extra tool specific configurations used in the run None Source code in src/pheval/run_metadata.py 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 @serde @dataclass class BasicOutputRunMetaData : \"\"\"Class for defining variables for the run metadata. Args: tool (str): Name of the tool implementation tool_version (str): Version of the tool implementation config (Path): Path to the config file located in the input directory run_timestamp (int): Time taken for run to complete corpus (Path): Path to corpus used in pheval run tool_specific_configuration_options (Any): Special field that can be overwritten by tool implementations to contain any extra tool specific configurations used in the run \"\"\" tool : str tool_version : str config : Path run_timestamp : int corpus : Path tool_specific_configuration_options : Any = None","title":"Run metadata"},{"location":"api/pheval/run_metadata/#src.pheval.run_metadata.BasicOutputRunMetaData","text":"Class for defining variables for the run metadata. Parameters: Name Type Description Default tool str Name of the tool implementation required tool_version str Version of the tool implementation required config Path Path to the config file located in the input directory required run_timestamp int Time taken for run to complete required corpus Path Path to corpus used in pheval run required tool_specific_configuration_options Any Special field that can be overwritten by tool implementations to contain any extra tool specific configurations used in the run None Source code in src/pheval/run_metadata.py 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 @serde @dataclass class BasicOutputRunMetaData : \"\"\"Class for defining variables for the run metadata. Args: tool (str): Name of the tool implementation tool_version (str): Version of the tool implementation config (Path): Path to the config file located in the input directory run_timestamp (int): Time taken for run to complete corpus (Path): Path to corpus used in pheval run tool_specific_configuration_options (Any): Special field that can be overwritten by tool implementations to contain any extra tool specific configurations used in the run \"\"\" tool : str tool_version : str config : Path run_timestamp : int corpus : Path tool_specific_configuration_options : Any = None","title":"BasicOutputRunMetaData"},{"location":"api/pheval/analyse/analysis/","text":"benchmark_directory ( results_dir_and_input , score_order , output_prefix , threshold , gene_analysis , variant_analysis , disease_analysis , plot_type ) Benchmark prioritisation performance for a single run. Parameters: Name Type Description Default results_dir_and_input TrackInputOutputDirectories Input and output directories for tracking results. required score_order str The order in which scores are arranged, this can be either ascending or descending. required output_prefix str Prefix for the benchmark output file names. required threshold float The threshold for benchmark evaluation. required gene_analysis bool Boolean flag indicating whether to benchmark gene results. required variant_analysis bool Boolean flag indicating whether to benchmark variant results. required disease_analysis bool Boolean flag indicating whether to benchmark disease results. required plot_type str Type of plot for benchmark visualisation. required Source code in src/pheval/analyse/analysis.py 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 def benchmark_directory ( results_dir_and_input : TrackInputOutputDirectories , score_order : str , output_prefix : str , threshold : float , gene_analysis : bool , variant_analysis : bool , disease_analysis : bool , plot_type : str , ) -> None : \"\"\" Benchmark prioritisation performance for a single run. Args: results_dir_and_input (TrackInputOutputDirectories): Input and output directories for tracking results. score_order (str): The order in which scores are arranged, this can be either ascending or descending. output_prefix (str): Prefix for the benchmark output file names. threshold (float): The threshold for benchmark evaluation. gene_analysis (bool): Boolean flag indicating whether to benchmark gene results. variant_analysis (bool): Boolean flag indicating whether to benchmark variant results. disease_analysis (bool): Boolean flag indicating whether to benchmark disease results. plot_type (str): Type of plot for benchmark visualisation. \"\"\" if gene_analysis : _run_benchmark ( results_dir_and_input = results_dir_and_input , score_order = score_order , output_prefix = output_prefix , threshold = threshold , plot_type = plot_type , benchmark_generator = GeneBenchmarkRunOutputGenerator (), ) if variant_analysis : _run_benchmark ( results_dir_and_input = results_dir_and_input , score_order = score_order , output_prefix = output_prefix , threshold = threshold , plot_type = plot_type , benchmark_generator = VariantBenchmarkRunOutputGenerator (), ) if disease_analysis : _run_benchmark ( results_dir_and_input = results_dir_and_input , score_order = score_order , output_prefix = output_prefix , threshold = threshold , plot_type = plot_type , benchmark_generator = DiseaseBenchmarkRunOutputGenerator (), ) benchmark_run_comparisons ( results_directories , score_order , output_prefix , threshold , gene_analysis , variant_analysis , disease_analysis , plot_type ) Benchmark prioritisation performance for several runs. Parameters: Name Type Description Default results_directories List [ TrackInputOutputDirectories ] Input and output directories for tracking results. required score_order str The order in which scores are arranged, this can be either ascending or descending. required output_prefix str Prefix for the benchmark output file names. required threshold float The threshold for benchmark evaluation. required gene_analysis bool Boolean flag indicating whether to benchmark gene results. required variant_analysis bool Boolean flag indicating whether to benchmark variant results. required disease_analysis bool Boolean flag indicating whether to benchmark disease results. required plot_type str Type of plot for benchmark visualisation. required Source code in src/pheval/analyse/analysis.py 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 def benchmark_run_comparisons ( results_directories : List [ TrackInputOutputDirectories ], score_order : str , output_prefix : str , threshold : float , gene_analysis : bool , variant_analysis : bool , disease_analysis : bool , plot_type : str , ) -> None : \"\"\" Benchmark prioritisation performance for several runs. Args: results_directories (List[TrackInputOutputDirectories]): Input and output directories for tracking results. score_order (str): The order in which scores are arranged, this can be either ascending or descending. output_prefix (str): Prefix for the benchmark output file names. threshold (float): The threshold for benchmark evaluation. gene_analysis (bool): Boolean flag indicating whether to benchmark gene results. variant_analysis (bool): Boolean flag indicating whether to benchmark variant results. disease_analysis (bool): Boolean flag indicating whether to benchmark disease results. plot_type (str): Type of plot for benchmark visualisation. \"\"\" if gene_analysis : _run_benchmark_comparison ( results_directories = results_directories , score_order = score_order , output_prefix = output_prefix , threshold = threshold , plot_type = plot_type , benchmark_generator = GeneBenchmarkRunOutputGenerator (), ) if variant_analysis : _run_benchmark_comparison ( results_directories = results_directories , score_order = score_order , output_prefix = output_prefix , threshold = threshold , plot_type = plot_type , benchmark_generator = VariantBenchmarkRunOutputGenerator (), ) if disease_analysis : _run_benchmark_comparison ( results_directories = results_directories , score_order = score_order , output_prefix = output_prefix , threshold = threshold , plot_type = plot_type , benchmark_generator = DiseaseBenchmarkRunOutputGenerator (), )","title":"Analysis"},{"location":"api/pheval/analyse/analysis/#src.pheval.analyse.analysis.benchmark_directory","text":"Benchmark prioritisation performance for a single run. Parameters: Name Type Description Default results_dir_and_input TrackInputOutputDirectories Input and output directories for tracking results. required score_order str The order in which scores are arranged, this can be either ascending or descending. required output_prefix str Prefix for the benchmark output file names. required threshold float The threshold for benchmark evaluation. required gene_analysis bool Boolean flag indicating whether to benchmark gene results. required variant_analysis bool Boolean flag indicating whether to benchmark variant results. required disease_analysis bool Boolean flag indicating whether to benchmark disease results. required plot_type str Type of plot for benchmark visualisation. required Source code in src/pheval/analyse/analysis.py 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 def benchmark_directory ( results_dir_and_input : TrackInputOutputDirectories , score_order : str , output_prefix : str , threshold : float , gene_analysis : bool , variant_analysis : bool , disease_analysis : bool , plot_type : str , ) -> None : \"\"\" Benchmark prioritisation performance for a single run. Args: results_dir_and_input (TrackInputOutputDirectories): Input and output directories for tracking results. score_order (str): The order in which scores are arranged, this can be either ascending or descending. output_prefix (str): Prefix for the benchmark output file names. threshold (float): The threshold for benchmark evaluation. gene_analysis (bool): Boolean flag indicating whether to benchmark gene results. variant_analysis (bool): Boolean flag indicating whether to benchmark variant results. disease_analysis (bool): Boolean flag indicating whether to benchmark disease results. plot_type (str): Type of plot for benchmark visualisation. \"\"\" if gene_analysis : _run_benchmark ( results_dir_and_input = results_dir_and_input , score_order = score_order , output_prefix = output_prefix , threshold = threshold , plot_type = plot_type , benchmark_generator = GeneBenchmarkRunOutputGenerator (), ) if variant_analysis : _run_benchmark ( results_dir_and_input = results_dir_and_input , score_order = score_order , output_prefix = output_prefix , threshold = threshold , plot_type = plot_type , benchmark_generator = VariantBenchmarkRunOutputGenerator (), ) if disease_analysis : _run_benchmark ( results_dir_and_input = results_dir_and_input , score_order = score_order , output_prefix = output_prefix , threshold = threshold , plot_type = plot_type , benchmark_generator = DiseaseBenchmarkRunOutputGenerator (), )","title":"benchmark_directory()"},{"location":"api/pheval/analyse/analysis/#src.pheval.analyse.analysis.benchmark_run_comparisons","text":"Benchmark prioritisation performance for several runs. Parameters: Name Type Description Default results_directories List [ TrackInputOutputDirectories ] Input and output directories for tracking results. required score_order str The order in which scores are arranged, this can be either ascending or descending. required output_prefix str Prefix for the benchmark output file names. required threshold float The threshold for benchmark evaluation. required gene_analysis bool Boolean flag indicating whether to benchmark gene results. required variant_analysis bool Boolean flag indicating whether to benchmark variant results. required disease_analysis bool Boolean flag indicating whether to benchmark disease results. required plot_type str Type of plot for benchmark visualisation. required Source code in src/pheval/analyse/analysis.py 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 def benchmark_run_comparisons ( results_directories : List [ TrackInputOutputDirectories ], score_order : str , output_prefix : str , threshold : float , gene_analysis : bool , variant_analysis : bool , disease_analysis : bool , plot_type : str , ) -> None : \"\"\" Benchmark prioritisation performance for several runs. Args: results_directories (List[TrackInputOutputDirectories]): Input and output directories for tracking results. score_order (str): The order in which scores are arranged, this can be either ascending or descending. output_prefix (str): Prefix for the benchmark output file names. threshold (float): The threshold for benchmark evaluation. gene_analysis (bool): Boolean flag indicating whether to benchmark gene results. variant_analysis (bool): Boolean flag indicating whether to benchmark variant results. disease_analysis (bool): Boolean flag indicating whether to benchmark disease results. plot_type (str): Type of plot for benchmark visualisation. \"\"\" if gene_analysis : _run_benchmark_comparison ( results_directories = results_directories , score_order = score_order , output_prefix = output_prefix , threshold = threshold , plot_type = plot_type , benchmark_generator = GeneBenchmarkRunOutputGenerator (), ) if variant_analysis : _run_benchmark_comparison ( results_directories = results_directories , score_order = score_order , output_prefix = output_prefix , threshold = threshold , plot_type = plot_type , benchmark_generator = VariantBenchmarkRunOutputGenerator (), ) if disease_analysis : _run_benchmark_comparison ( results_directories = results_directories , score_order = score_order , output_prefix = output_prefix , threshold = threshold , plot_type = plot_type , benchmark_generator = DiseaseBenchmarkRunOutputGenerator (), )","title":"benchmark_run_comparisons()"},{"location":"api/pheval/analyse/benchmark_generator/","text":"BenchmarkRunOutputGenerator dataclass Base class for recording data required for generating benchmarking outputs. Attributes: Name Type Description prioritisation_type_file_prefix str Prefix for the prioritisation type output file. y_label str Label for the y-axis in benchmarking outputs. generate_benchmark_run_results Callable Callable to generate benchmark run results. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix str Suffix for the rank comparison file. Source code in src/pheval/analyse/benchmark_generator.py 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 @dataclass class BenchmarkRunOutputGenerator : \"\"\"Base class for recording data required for generating benchmarking outputs. Attributes: prioritisation_type_file_prefix (str): Prefix for the prioritisation type output file. y_label (str): Label for the y-axis in benchmarking outputs. generate_benchmark_run_results (Callable): Callable to generate benchmark run results. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix (str): Suffix for the rank comparison file. \"\"\" prioritisation_type_file_prefix : str y_label : str generate_benchmark_run_results : Callable [ [ TrackInputOutputDirectories , str , float , defaultdict ], BenchmarkRunResults ] stats_comparison_file_suffix : str DiseaseBenchmarkRunOutputGenerator dataclass Bases: BenchmarkRunOutputGenerator Subclass of BenchmarkRunOutputGenerator specialised for producing disease prioritisation benchmarking outputs. This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes specifically for disease prioritisation benchmarking. Attributes: Name Type Description prioritisation_type_file_prefix str Prefix for the disease prioritisation type file. Defaults to DISEASE_PLOT_FILE_PREFIX. y_label str Label for the y-axis in disease prioritisation benchmarking outputs. Defaults to DISEASE_PLOT_Y_LABEL. generate_benchmark_run_results Callable Callable to generate disease prioritisation benchmark run results. Defaults to benchmark_disease_prioritisation. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix str Suffix for the disease rank comparison file. Defaults to \"-disease_summary.tsv\". Source code in src/pheval/analyse/benchmark_generator.py 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 @dataclass class DiseaseBenchmarkRunOutputGenerator ( BenchmarkRunOutputGenerator ): \"\"\" Subclass of BenchmarkRunOutputGenerator specialised for producing disease prioritisation benchmarking outputs. This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes specifically for disease prioritisation benchmarking. Attributes: prioritisation_type_file_prefix (str): Prefix for the disease prioritisation type file. Defaults to DISEASE_PLOT_FILE_PREFIX. y_label (str): Label for the y-axis in disease prioritisation benchmarking outputs. Defaults to DISEASE_PLOT_Y_LABEL. generate_benchmark_run_results (Callable): Callable to generate disease prioritisation benchmark run results. Defaults to benchmark_disease_prioritisation. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix (str): Suffix for the disease rank comparison file. Defaults to \"-disease_summary.tsv\". \"\"\" prioritisation_type_file_prefix : str = DISEASE_PLOT_FILE_PREFIX y_label : str = DISEASE_PLOT_Y_LABEL generate_benchmark_run_results : Callable [ [ TrackInputOutputDirectories , str , float , defaultdict ], BenchmarkRunResults ] = benchmark_disease_prioritisation stats_comparison_file_suffix : str = \"-disease_summary.tsv\" GeneBenchmarkRunOutputGenerator dataclass Bases: BenchmarkRunOutputGenerator Subclass of BenchmarkRunOutputGenerator specialised for producing gene prioritisation benchmarking outputs. This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes specifically for gene prioritisation benchmarking. Attributes: Name Type Description prioritisation_type_file_prefix str Prefix for the gene prioritisation type file. Defaults to GENE_PLOT_FILE_PREFIX. y_label str Label for the y-axis in gene prioritisation benchmarking outputs. Defaults to GENE_PLOT_Y_LABEL. generate_benchmark_run_results Callable Callable to generate gene prioritisation benchmark run results. Defaults to benchmark_gene_prioritisation. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix str Suffix for the gene rank comparison file. Defaults to \"-gene_summary.tsv\". Source code in src/pheval/analyse/benchmark_generator.py 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 @dataclass class GeneBenchmarkRunOutputGenerator ( BenchmarkRunOutputGenerator ): \"\"\" Subclass of BenchmarkRunOutputGenerator specialised for producing gene prioritisation benchmarking outputs. This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes specifically for gene prioritisation benchmarking. Attributes: prioritisation_type_file_prefix (str): Prefix for the gene prioritisation type file. Defaults to GENE_PLOT_FILE_PREFIX. y_label (str): Label for the y-axis in gene prioritisation benchmarking outputs. Defaults to GENE_PLOT_Y_LABEL. generate_benchmark_run_results (Callable): Callable to generate gene prioritisation benchmark run results. Defaults to benchmark_gene_prioritisation. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix (str): Suffix for the gene rank comparison file. Defaults to \"-gene_summary.tsv\". \"\"\" prioritisation_type_file_prefix : str = GENE_PLOT_FILE_PREFIX y_label : str = GENE_PLOT_Y_LABEL generate_benchmark_run_results : Callable [ [ TrackInputOutputDirectories , str , float , defaultdict ], BenchmarkRunResults ] = benchmark_gene_prioritisation stats_comparison_file_suffix : str = \"-gene_summary.tsv\" VariantBenchmarkRunOutputGenerator dataclass Bases: BenchmarkRunOutputGenerator Subclass of BenchmarkRunOutputGenerator specialised for producing variant prioritisation benchmarking outputs. This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes specifically for variant prioritisation benchmarking. Attributes: Name Type Description prioritisation_type_file_prefix str Prefix for the variant prioritisation type file. Defaults to VARIANT_PLOT_FILE_PREFIX. y_label str Label for the y-axis in variant prioritisation benchmarking outputs. Defaults to VARIANT_PLOT_Y_LABEL. generate_benchmark_run_results Callable Callable to generate variant prioritisation benchmark run results. Defaults to benchmark_variant_prioritisation. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix str Suffix for the variant rank comparison file. Defaults to \"-variant_summary.tsv\". Source code in src/pheval/analyse/benchmark_generator.py 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 @dataclass class VariantBenchmarkRunOutputGenerator ( BenchmarkRunOutputGenerator ): \"\"\" Subclass of BenchmarkRunOutputGenerator specialised for producing variant prioritisation benchmarking outputs. This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes specifically for variant prioritisation benchmarking. Attributes: prioritisation_type_file_prefix (str): Prefix for the variant prioritisation type file. Defaults to VARIANT_PLOT_FILE_PREFIX. y_label (str): Label for the y-axis in variant prioritisation benchmarking outputs. Defaults to VARIANT_PLOT_Y_LABEL. generate_benchmark_run_results (Callable): Callable to generate variant prioritisation benchmark run results. Defaults to benchmark_variant_prioritisation. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix (str): Suffix for the variant rank comparison file. Defaults to \"-variant_summary.tsv\". \"\"\" prioritisation_type_file_prefix : str = VARIANT_PLOT_FILE_PREFIX y_label : str = VARIANT_PLOT_Y_LABEL generate_benchmark_run_results : Callable [ [ TrackInputOutputDirectories , str , float , defaultdict ], BenchmarkRunResults ] = benchmark_variant_prioritisation stats_comparison_file_suffix : str = \"-variant_summary.tsv\"","title":"Benchmark generator"},{"location":"api/pheval/analyse/benchmark_generator/#src.pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator","text":"Base class for recording data required for generating benchmarking outputs. Attributes: Name Type Description prioritisation_type_file_prefix str Prefix for the prioritisation type output file. y_label str Label for the y-axis in benchmarking outputs. generate_benchmark_run_results Callable Callable to generate benchmark run results. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix str Suffix for the rank comparison file. Source code in src/pheval/analyse/benchmark_generator.py 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 @dataclass class BenchmarkRunOutputGenerator : \"\"\"Base class for recording data required for generating benchmarking outputs. Attributes: prioritisation_type_file_prefix (str): Prefix for the prioritisation type output file. y_label (str): Label for the y-axis in benchmarking outputs. generate_benchmark_run_results (Callable): Callable to generate benchmark run results. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix (str): Suffix for the rank comparison file. \"\"\" prioritisation_type_file_prefix : str y_label : str generate_benchmark_run_results : Callable [ [ TrackInputOutputDirectories , str , float , defaultdict ], BenchmarkRunResults ] stats_comparison_file_suffix : str","title":"BenchmarkRunOutputGenerator"},{"location":"api/pheval/analyse/benchmark_generator/#src.pheval.analyse.benchmark_generator.DiseaseBenchmarkRunOutputGenerator","text":"Bases: BenchmarkRunOutputGenerator Subclass of BenchmarkRunOutputGenerator specialised for producing disease prioritisation benchmarking outputs. This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes specifically for disease prioritisation benchmarking. Attributes: Name Type Description prioritisation_type_file_prefix str Prefix for the disease prioritisation type file. Defaults to DISEASE_PLOT_FILE_PREFIX. y_label str Label for the y-axis in disease prioritisation benchmarking outputs. Defaults to DISEASE_PLOT_Y_LABEL. generate_benchmark_run_results Callable Callable to generate disease prioritisation benchmark run results. Defaults to benchmark_disease_prioritisation. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix str Suffix for the disease rank comparison file. Defaults to \"-disease_summary.tsv\". Source code in src/pheval/analyse/benchmark_generator.py 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 @dataclass class DiseaseBenchmarkRunOutputGenerator ( BenchmarkRunOutputGenerator ): \"\"\" Subclass of BenchmarkRunOutputGenerator specialised for producing disease prioritisation benchmarking outputs. This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes specifically for disease prioritisation benchmarking. Attributes: prioritisation_type_file_prefix (str): Prefix for the disease prioritisation type file. Defaults to DISEASE_PLOT_FILE_PREFIX. y_label (str): Label for the y-axis in disease prioritisation benchmarking outputs. Defaults to DISEASE_PLOT_Y_LABEL. generate_benchmark_run_results (Callable): Callable to generate disease prioritisation benchmark run results. Defaults to benchmark_disease_prioritisation. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix (str): Suffix for the disease rank comparison file. Defaults to \"-disease_summary.tsv\". \"\"\" prioritisation_type_file_prefix : str = DISEASE_PLOT_FILE_PREFIX y_label : str = DISEASE_PLOT_Y_LABEL generate_benchmark_run_results : Callable [ [ TrackInputOutputDirectories , str , float , defaultdict ], BenchmarkRunResults ] = benchmark_disease_prioritisation stats_comparison_file_suffix : str = \"-disease_summary.tsv\"","title":"DiseaseBenchmarkRunOutputGenerator"},{"location":"api/pheval/analyse/benchmark_generator/#src.pheval.analyse.benchmark_generator.GeneBenchmarkRunOutputGenerator","text":"Bases: BenchmarkRunOutputGenerator Subclass of BenchmarkRunOutputGenerator specialised for producing gene prioritisation benchmarking outputs. This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes specifically for gene prioritisation benchmarking. Attributes: Name Type Description prioritisation_type_file_prefix str Prefix for the gene prioritisation type file. Defaults to GENE_PLOT_FILE_PREFIX. y_label str Label for the y-axis in gene prioritisation benchmarking outputs. Defaults to GENE_PLOT_Y_LABEL. generate_benchmark_run_results Callable Callable to generate gene prioritisation benchmark run results. Defaults to benchmark_gene_prioritisation. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix str Suffix for the gene rank comparison file. Defaults to \"-gene_summary.tsv\". Source code in src/pheval/analyse/benchmark_generator.py 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 @dataclass class GeneBenchmarkRunOutputGenerator ( BenchmarkRunOutputGenerator ): \"\"\" Subclass of BenchmarkRunOutputGenerator specialised for producing gene prioritisation benchmarking outputs. This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes specifically for gene prioritisation benchmarking. Attributes: prioritisation_type_file_prefix (str): Prefix for the gene prioritisation type file. Defaults to GENE_PLOT_FILE_PREFIX. y_label (str): Label for the y-axis in gene prioritisation benchmarking outputs. Defaults to GENE_PLOT_Y_LABEL. generate_benchmark_run_results (Callable): Callable to generate gene prioritisation benchmark run results. Defaults to benchmark_gene_prioritisation. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix (str): Suffix for the gene rank comparison file. Defaults to \"-gene_summary.tsv\". \"\"\" prioritisation_type_file_prefix : str = GENE_PLOT_FILE_PREFIX y_label : str = GENE_PLOT_Y_LABEL generate_benchmark_run_results : Callable [ [ TrackInputOutputDirectories , str , float , defaultdict ], BenchmarkRunResults ] = benchmark_gene_prioritisation stats_comparison_file_suffix : str = \"-gene_summary.tsv\"","title":"GeneBenchmarkRunOutputGenerator"},{"location":"api/pheval/analyse/benchmark_generator/#src.pheval.analyse.benchmark_generator.VariantBenchmarkRunOutputGenerator","text":"Bases: BenchmarkRunOutputGenerator Subclass of BenchmarkRunOutputGenerator specialised for producing variant prioritisation benchmarking outputs. This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes specifically for variant prioritisation benchmarking. Attributes: Name Type Description prioritisation_type_file_prefix str Prefix for the variant prioritisation type file. Defaults to VARIANT_PLOT_FILE_PREFIX. y_label str Label for the y-axis in variant prioritisation benchmarking outputs. Defaults to VARIANT_PLOT_Y_LABEL. generate_benchmark_run_results Callable Callable to generate variant prioritisation benchmark run results. Defaults to benchmark_variant_prioritisation. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix str Suffix for the variant rank comparison file. Defaults to \"-variant_summary.tsv\". Source code in src/pheval/analyse/benchmark_generator.py 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 @dataclass class VariantBenchmarkRunOutputGenerator ( BenchmarkRunOutputGenerator ): \"\"\" Subclass of BenchmarkRunOutputGenerator specialised for producing variant prioritisation benchmarking outputs. This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes specifically for variant prioritisation benchmarking. Attributes: prioritisation_type_file_prefix (str): Prefix for the variant prioritisation type file. Defaults to VARIANT_PLOT_FILE_PREFIX. y_label (str): Label for the y-axis in variant prioritisation benchmarking outputs. Defaults to VARIANT_PLOT_Y_LABEL. generate_benchmark_run_results (Callable): Callable to generate variant prioritisation benchmark run results. Defaults to benchmark_variant_prioritisation. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix (str): Suffix for the variant rank comparison file. Defaults to \"-variant_summary.tsv\". \"\"\" prioritisation_type_file_prefix : str = VARIANT_PLOT_FILE_PREFIX y_label : str = VARIANT_PLOT_Y_LABEL generate_benchmark_run_results : Callable [ [ TrackInputOutputDirectories , str , float , defaultdict ], BenchmarkRunResults ] = benchmark_variant_prioritisation stats_comparison_file_suffix : str = \"-variant_summary.tsv\"","title":"VariantBenchmarkRunOutputGenerator"},{"location":"api/pheval/analyse/benchmarking_data/","text":"BenchmarkRunResults dataclass Benchmarking results for a run. Attributes: Name Type Description ranks dict Dictionary containing recorded ranks for samples. rank_stats RankStats Statistics related to benchmark. results_dir Path Path to the result directory. Defaults to None. benchmark_name str Name of the benchmark run. Defaults to None. Source code in src/pheval/analyse/benchmarking_data.py 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 @dataclass class BenchmarkRunResults : \"\"\" Benchmarking results for a run. Attributes: ranks (dict): Dictionary containing recorded ranks for samples. rank_stats (RankStats): Statistics related to benchmark. results_dir (Path, optional): Path to the result directory. Defaults to None. benchmark_name (str, optional): Name of the benchmark run. Defaults to None. \"\"\" ranks : dict rank_stats : RankStats binary_classification_stats : BinaryClassificationStats results_dir : Path = None benchmark_name : str = None","title":"Benchmarking data"},{"location":"api/pheval/analyse/benchmarking_data/#src.pheval.analyse.benchmarking_data.BenchmarkRunResults","text":"Benchmarking results for a run. Attributes: Name Type Description ranks dict Dictionary containing recorded ranks for samples. rank_stats RankStats Statistics related to benchmark. results_dir Path Path to the result directory. Defaults to None. benchmark_name str Name of the benchmark run. Defaults to None. Source code in src/pheval/analyse/benchmarking_data.py 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 @dataclass class BenchmarkRunResults : \"\"\" Benchmarking results for a run. Attributes: ranks (dict): Dictionary containing recorded ranks for samples. rank_stats (RankStats): Statistics related to benchmark. results_dir (Path, optional): Path to the result directory. Defaults to None. benchmark_name (str, optional): Name of the benchmark run. Defaults to None. \"\"\" ranks : dict rank_stats : RankStats binary_classification_stats : BinaryClassificationStats results_dir : Path = None benchmark_name : str = None","title":"BenchmarkRunResults"},{"location":"api/pheval/analyse/binary_classification_stats/","text":"BinaryClassificationStats dataclass A data class representing counts of different categories in binary classification. Attributes: Name Type Description true_positives int The count of true positive instances - i.e., the number of known entities ranked 1 in the results. true_negatives int The count of true negative instances - i.e., the number of non-relevant entities ranked at a position other than 1 in the results. false_positives int The count of false positive instances - i.e., the number of non-relevant entities ranked at position 1 in the results. false_negatives int The count of false negative instances - i.e., the number of known entities ranked at a position other than 1 in the results. Source code in src/pheval/analyse/binary_classification_stats.py 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 @dataclass class BinaryClassificationStats : \"\"\" A data class representing counts of different categories in binary classification. Attributes: true_positives (int): The count of true positive instances - i.e., the number of known entities ranked 1 in the results. true_negatives (int): The count of true negative instances - i.e., the number of non-relevant entities ranked at a position other than 1 in the results. false_positives (int): The count of false positive instances - i.e., the number of non-relevant entities ranked at position 1 in the results. false_negatives (int): The count of false negative instances - i.e., the number of known entities ranked at a position other than 1 in the results. \"\"\" true_positives : int = 0 true_negatives : int = 0 false_positives : int = 0 false_negatives : int = 0 labels : List = field ( default_factory = list ) scores : List = field ( default_factory = list ) @staticmethod def remove_relevant_ranks ( pheval_results : Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ], ], relevant_ranks : List [ int ], ) -> List [ int ]: \"\"\" Remove the relevant entity ranks from all result ranks Args: pheval_results: (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): The list of all pheval results. relevant_ranks (List[int]): A list of the ranks associated with the known entities. Returns: List[int]: A list of the ranks with the relevant entity ranks removed. \"\"\" all_result_ranks = [ pheval_result . rank for pheval_result in pheval_results ] for rank in relevant_ranks : if rank in all_result_ranks : all_result_ranks . remove ( rank ) continue return all_result_ranks def add_classification_for_known_entities ( self , relevant_ranks : List [ int ]) -> None : \"\"\" Update binary classification metrics for known entities based on their ranking. Args: relevant_ranks (List[int]): A list of the ranks associated with the known entities. \"\"\" for rank in relevant_ranks : if rank == 1 : self . true_positives += 1 elif rank != 1 : self . false_negatives += 1 def add_classification_for_other_entities ( self , ranks : List [ int ]) -> None : \"\"\" Update binary classification metrics for other entities based on their ranking. Args: ranks (List[int]): A list of the ranks for all other entities. \"\"\" for rank in ranks : if rank == 1 : self . false_positives += 1 elif rank != 1 : self . true_negatives += 1 def add_labels_and_scores ( self , pheval_results : Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ], ], relevant_ranks : List [ int ], ): \"\"\" Adds scores and labels from the PhEval results. Args: pheval_results (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): List of all PhEval results relevant_ranks (List[int]): A list of the ranks associated with the known entities. \"\"\" relevant_ranks_copy = relevant_ranks . copy () for result in pheval_results : self . scores . append ( result . score ) label = 1 if result . rank in relevant_ranks_copy else 0 self . labels . append ( label ) relevant_ranks_copy . remove ( result . rank ) if label == 1 else None def add_classification ( self , pheval_results : Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ], ], relevant_ranks : List [ int ], ) -> None : \"\"\" Update binary classification metrics for known and unknown entities based on their ranks. Args: pheval_results: (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): The list of all pheval results. relevant_ranks (List[int]): A list of the ranks associated with the known entities. \"\"\" self . add_classification_for_known_entities ( relevant_ranks ) self . add_classification_for_other_entities ( self . remove_relevant_ranks ( pheval_results , relevant_ranks ) ) self . add_labels_and_scores ( pheval_results , relevant_ranks ) def sensitivity ( self ) -> float : \"\"\" Calculate sensitivity. Sensitivity measures the proportion of actual positive instances correctly identified by the model. Returns: float: The sensitivity of the model, calculated as true positives divided by the sum of true positives and false negatives. Returns 0 if both true positives and false negatives are zero. \"\"\" return ( self . true_positives / ( self . true_positives + self . false_negatives ) if ( self . true_positives + self . false_negatives ) > 0 else 0.0 ) def specificity ( self ) -> float : \"\"\" Calculate specificity. Specificity measures the proportion of actual negative instances correctly identified by the model. Returns: float: The specificity of the model, calculated as true negatives divided by the sum of true negatives and false positives. Returns 0.0 if both true negatives and false positives are zero. \"\"\" return ( self . true_negatives / ( self . true_negatives + self . false_positives ) if ( self . true_negatives + self . false_positives ) > 0 else 0.0 ) def precision ( self ) -> float : \"\"\" Calculate precision. Precision measures the proportion of correctly predicted positive instances out of all instances predicted as positive. Returns: float: The precision of the model, calculated as true positives divided by the sum of true positives and false positives. Returns 0.0 if both true positives and false positives are zero. \"\"\" return ( self . true_positives / ( self . true_positives + self . false_positives ) if ( self . true_positives + self . false_positives ) > 0 else 0.0 ) def negative_predictive_value ( self ) -> float : \"\"\" Calculate Negative Predictive Value (NPV). NPV measures the proportion of correctly predicted negative instances out of all instances predicted negative. Returns: float: The Negative Predictive Value of the model, calculated as true negatives divided by the sum of true negatives and false negatives. Returns 0.0 if both true negatives and false negatives are zero. \"\"\" return ( self . true_negatives / ( self . true_negatives + self . false_negatives ) if ( self . true_negatives + self . false_negatives ) > 0 else 0.0 ) def false_positive_rate ( self ) -> float : \"\"\" Calculate False Positive Rate (FPR). FPR measures the proportion of instances predicted as positive that are actually negative. Returns: float: The False Positive Rate of the model, calculated as false positives divided by the sum of false positives and true negatives. Returns 0.0 if both false positives and true negatives are zero. \"\"\" return ( self . false_positives / ( self . false_positives + self . true_negatives ) if ( self . false_positives + self . true_negatives ) > 0 else 0.0 ) def false_discovery_rate ( self ) -> float : \"\"\" Calculate False Discovery Rate (FDR). FDR measures the proportion of instances predicted as positive that are actually negative. Returns: float: The False Discovery Rate of the model, calculated as false positives divided by the sum of false positives and true positives. Returns 0.0 if both false positives and true positives are zero. \"\"\" return ( self . false_positives / ( self . false_positives + self . true_positives ) if ( self . false_positives + self . true_positives ) > 0 else 0.0 ) def false_negative_rate ( self ) -> float : \"\"\" Calculate False Negative Rate (FNR). FNR measures the proportion of instances that are actually positive but predicted as negative. Returns: float: The False Negative Rate of the model, calculated as false negatives divided by the sum of false negatives and true positives. Returns 0.0 if both false negatives and true positives are zero. \"\"\" return ( self . false_negatives / ( self . false_negatives + self . true_positives ) if ( self . false_negatives + self . true_positives ) > 0 else 0.0 ) def accuracy ( self ) -> float : \"\"\" Calculate Accuracy. Accuracy measures the proportion of correctly predicted instances out of all instances. Returns: float: The Accuracy of the model, calculated as the sum of true positives and true negatives divided by the sum of true positives, false positives, true negatives, and false negatives. Returns 0.0 if the total sum of counts is zero. \"\"\" return ( ( self . true_positives + self . true_negatives ) / ( self . true_positives + self . false_positives + self . true_negatives + self . false_negatives ) if ( self . true_positives + self . false_negatives + self . true_negatives + self . false_negatives ) > 0 else 0.0 ) def f1_score ( self ) -> float : \"\"\" Calculate F1 Score. F1 Score is the harmonic mean of precision and recall, providing a balance between false positives and false negatives. Returns: float: The F1 Score of the model, calculated as 2 * TP / (2 * TP + FP + FN). Returns 0.0 if the denominator is zero. \"\"\" return ( ( 2 * self . true_positives ) / (( 2 * self . true_positives ) + self . false_positives + self . false_negatives ) if ( self . true_positives + self . false_positives + self . false_negatives ) > 0 else 0.0 ) def matthews_correlation_coefficient ( self ) -> float : \"\"\" Calculate Matthews Correlation Coefficient (MCC). MCC is a measure of the quality of binary classifications, accounting for imbalances in the data. Returns: float: The Matthews Correlation Coefficient of the model, calculated as ((TP * TN) - (FP * FN)) / sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)). Returns 0.0 if the denominator is zero. \"\"\" return ( ( ( self . true_positives * self . true_negatives ) - ( self . false_positives * self . false_negatives ) ) / ( sqrt ( ( self . true_positives + self . false_positives ) * ( self . true_positives + self . false_negatives ) * ( self . true_negatives + self . false_positives ) * ( self . true_negatives + self . false_negatives ) ) ) if ( self . true_positives + self . false_negatives + self . true_negatives + self . false_negatives ) > 0 else 0.0 ) accuracy () Calculate Accuracy. Accuracy measures the proportion of correctly predicted instances out of all instances. Returns: Name Type Description float float The Accuracy of the model, calculated as the sum of true positives and true negatives divided by float the sum of true positives, false positives, true negatives, and false negatives. float Returns 0.0 if the total sum of counts is zero. Source code in src/pheval/analyse/binary_classification_stats.py 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 def accuracy ( self ) -> float : \"\"\" Calculate Accuracy. Accuracy measures the proportion of correctly predicted instances out of all instances. Returns: float: The Accuracy of the model, calculated as the sum of true positives and true negatives divided by the sum of true positives, false positives, true negatives, and false negatives. Returns 0.0 if the total sum of counts is zero. \"\"\" return ( ( self . true_positives + self . true_negatives ) / ( self . true_positives + self . false_positives + self . true_negatives + self . false_negatives ) if ( self . true_positives + self . false_negatives + self . true_negatives + self . false_negatives ) > 0 else 0.0 ) add_classification ( pheval_results , relevant_ranks ) Update binary classification metrics for known and unknown entities based on their ranks. Parameters: Name Type Description Default pheval_results Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ]] (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): The list of all pheval results. required relevant_ranks List [ int ] A list of the ranks associated with the known entities. required Source code in src/pheval/analyse/binary_classification_stats.py 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 def add_classification ( self , pheval_results : Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ], ], relevant_ranks : List [ int ], ) -> None : \"\"\" Update binary classification metrics for known and unknown entities based on their ranks. Args: pheval_results: (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): The list of all pheval results. relevant_ranks (List[int]): A list of the ranks associated with the known entities. \"\"\" self . add_classification_for_known_entities ( relevant_ranks ) self . add_classification_for_other_entities ( self . remove_relevant_ranks ( pheval_results , relevant_ranks ) ) self . add_labels_and_scores ( pheval_results , relevant_ranks ) add_classification_for_known_entities ( relevant_ranks ) Update binary classification metrics for known entities based on their ranking. Parameters: Name Type Description Default relevant_ranks List [ int ] A list of the ranks associated with the known entities. required Source code in src/pheval/analyse/binary_classification_stats.py 63 64 65 66 67 68 69 70 71 72 73 74 def add_classification_for_known_entities ( self , relevant_ranks : List [ int ]) -> None : \"\"\" Update binary classification metrics for known entities based on their ranking. Args: relevant_ranks (List[int]): A list of the ranks associated with the known entities. \"\"\" for rank in relevant_ranks : if rank == 1 : self . true_positives += 1 elif rank != 1 : self . false_negatives += 1 add_classification_for_other_entities ( ranks ) Update binary classification metrics for other entities based on their ranking. Parameters: Name Type Description Default ranks List [ int ] A list of the ranks for all other entities. required Source code in src/pheval/analyse/binary_classification_stats.py 76 77 78 79 80 81 82 83 84 85 86 87 def add_classification_for_other_entities ( self , ranks : List [ int ]) -> None : \"\"\" Update binary classification metrics for other entities based on their ranking. Args: ranks (List[int]): A list of the ranks for all other entities. \"\"\" for rank in ranks : if rank == 1 : self . false_positives += 1 elif rank != 1 : self . true_negatives += 1 add_labels_and_scores ( pheval_results , relevant_ranks ) Adds scores and labels from the PhEval results. Parameters: Name Type Description Default relevant_ranks List [ int ] A list of the ranks associated with the known entities. required Source code in src/pheval/analyse/binary_classification_stats.py 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 def add_labels_and_scores ( self , pheval_results : Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ], ], relevant_ranks : List [ int ], ): \"\"\" Adds scores and labels from the PhEval results. Args: pheval_results (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): List of all PhEval results relevant_ranks (List[int]): A list of the ranks associated with the known entities. \"\"\" relevant_ranks_copy = relevant_ranks . copy () for result in pheval_results : self . scores . append ( result . score ) label = 1 if result . rank in relevant_ranks_copy else 0 self . labels . append ( label ) relevant_ranks_copy . remove ( result . rank ) if label == 1 else None f1_score () Calculate F1 Score. F1 Score is the harmonic mean of precision and recall, providing a balance between false positives and false negatives. Returns: Name Type Description float float The F1 Score of the model, calculated as 2 * TP / (2 * TP + FP + FN). float Returns 0.0 if the denominator is zero. Source code in src/pheval/analyse/binary_classification_stats.py 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 def f1_score ( self ) -> float : \"\"\" Calculate F1 Score. F1 Score is the harmonic mean of precision and recall, providing a balance between false positives and false negatives. Returns: float: The F1 Score of the model, calculated as 2 * TP / (2 * TP + FP + FN). Returns 0.0 if the denominator is zero. \"\"\" return ( ( 2 * self . true_positives ) / (( 2 * self . true_positives ) + self . false_positives + self . false_negatives ) if ( self . true_positives + self . false_positives + self . false_negatives ) > 0 else 0.0 ) false_discovery_rate () Calculate False Discovery Rate (FDR). FDR measures the proportion of instances predicted as positive that are actually negative. Returns: Name Type Description float float The False Discovery Rate of the model, calculated as false positives divided by the sum of float false positives and true positives. Returns 0.0 if both false positives and true positives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 def false_discovery_rate ( self ) -> float : \"\"\" Calculate False Discovery Rate (FDR). FDR measures the proportion of instances predicted as positive that are actually negative. Returns: float: The False Discovery Rate of the model, calculated as false positives divided by the sum of false positives and true positives. Returns 0.0 if both false positives and true positives are zero. \"\"\" return ( self . false_positives / ( self . false_positives + self . true_positives ) if ( self . false_positives + self . true_positives ) > 0 else 0.0 ) false_negative_rate () Calculate False Negative Rate (FNR). FNR measures the proportion of instances that are actually positive but predicted as negative. Returns: Name Type Description float float The False Negative Rate of the model, calculated as false negatives divided by the sum of float false negatives and true positives. Returns 0.0 if both false negatives and true positives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 def false_negative_rate ( self ) -> float : \"\"\" Calculate False Negative Rate (FNR). FNR measures the proportion of instances that are actually positive but predicted as negative. Returns: float: The False Negative Rate of the model, calculated as false negatives divided by the sum of false negatives and true positives. Returns 0.0 if both false negatives and true positives are zero. \"\"\" return ( self . false_negatives / ( self . false_negatives + self . true_positives ) if ( self . false_negatives + self . true_positives ) > 0 else 0.0 ) false_positive_rate () Calculate False Positive Rate (FPR). FPR measures the proportion of instances predicted as positive that are actually negative. Returns: Name Type Description float float The False Positive Rate of the model, calculated as false positives divided by the sum of float false positives and true negatives. Returns 0.0 if both false positives and true negatives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 def false_positive_rate ( self ) -> float : \"\"\" Calculate False Positive Rate (FPR). FPR measures the proportion of instances predicted as positive that are actually negative. Returns: float: The False Positive Rate of the model, calculated as false positives divided by the sum of false positives and true negatives. Returns 0.0 if both false positives and true negatives are zero. \"\"\" return ( self . false_positives / ( self . false_positives + self . true_negatives ) if ( self . false_positives + self . true_negatives ) > 0 else 0.0 ) matthews_correlation_coefficient () Calculate Matthews Correlation Coefficient (MCC). MCC is a measure of the quality of binary classifications, accounting for imbalances in the data. Returns: Name Type Description float float The Matthews Correlation Coefficient of the model, calculated as float ((TP * TN) - (FP * FN)) / sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)). float Returns 0.0 if the denominator is zero. Source code in src/pheval/analyse/binary_classification_stats.py 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 def matthews_correlation_coefficient ( self ) -> float : \"\"\" Calculate Matthews Correlation Coefficient (MCC). MCC is a measure of the quality of binary classifications, accounting for imbalances in the data. Returns: float: The Matthews Correlation Coefficient of the model, calculated as ((TP * TN) - (FP * FN)) / sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)). Returns 0.0 if the denominator is zero. \"\"\" return ( ( ( self . true_positives * self . true_negatives ) - ( self . false_positives * self . false_negatives ) ) / ( sqrt ( ( self . true_positives + self . false_positives ) * ( self . true_positives + self . false_negatives ) * ( self . true_negatives + self . false_positives ) * ( self . true_negatives + self . false_negatives ) ) ) if ( self . true_positives + self . false_negatives + self . true_negatives + self . false_negatives ) > 0 else 0.0 ) negative_predictive_value () Calculate Negative Predictive Value (NPV). NPV measures the proportion of correctly predicted negative instances out of all instances predicted negative. Returns: Name Type Description float float The Negative Predictive Value of the model, calculated as true negatives divided by the sum of float true negatives and false negatives. Returns 0.0 if both true negatives and false negatives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 def negative_predictive_value ( self ) -> float : \"\"\" Calculate Negative Predictive Value (NPV). NPV measures the proportion of correctly predicted negative instances out of all instances predicted negative. Returns: float: The Negative Predictive Value of the model, calculated as true negatives divided by the sum of true negatives and false negatives. Returns 0.0 if both true negatives and false negatives are zero. \"\"\" return ( self . true_negatives / ( self . true_negatives + self . false_negatives ) if ( self . true_negatives + self . false_negatives ) > 0 else 0.0 ) precision () Calculate precision. Precision measures the proportion of correctly predicted positive instances out of all instances predicted as positive. Returns: Name Type Description float float The precision of the model, calculated as true positives divided by the sum of true positives float and false positives. Returns 0.0 if both true positives and false positives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 def precision ( self ) -> float : \"\"\" Calculate precision. Precision measures the proportion of correctly predicted positive instances out of all instances predicted as positive. Returns: float: The precision of the model, calculated as true positives divided by the sum of true positives and false positives. Returns 0.0 if both true positives and false positives are zero. \"\"\" return ( self . true_positives / ( self . true_positives + self . false_positives ) if ( self . true_positives + self . false_positives ) > 0 else 0.0 ) remove_relevant_ranks ( pheval_results , relevant_ranks ) staticmethod Remove the relevant entity ranks from all result ranks Parameters: Name Type Description Default pheval_results Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ]] (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): The list of all pheval results. required relevant_ranks List [ int ] A list of the ranks associated with the known entities. required Returns: Type Description List [ int ] List[int]: A list of the ranks with the relevant entity ranks removed. Source code in src/pheval/analyse/binary_classification_stats.py 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 @staticmethod def remove_relevant_ranks ( pheval_results : Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ], ], relevant_ranks : List [ int ], ) -> List [ int ]: \"\"\" Remove the relevant entity ranks from all result ranks Args: pheval_results: (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): The list of all pheval results. relevant_ranks (List[int]): A list of the ranks associated with the known entities. Returns: List[int]: A list of the ranks with the relevant entity ranks removed. \"\"\" all_result_ranks = [ pheval_result . rank for pheval_result in pheval_results ] for rank in relevant_ranks : if rank in all_result_ranks : all_result_ranks . remove ( rank ) continue return all_result_ranks sensitivity () Calculate sensitivity. Sensitivity measures the proportion of actual positive instances correctly identified by the model. Returns: Name Type Description float float The sensitivity of the model, calculated as true positives divided by the sum of true positives float and false negatives. Returns 0 if both true positives and false negatives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 def sensitivity ( self ) -> float : \"\"\" Calculate sensitivity. Sensitivity measures the proportion of actual positive instances correctly identified by the model. Returns: float: The sensitivity of the model, calculated as true positives divided by the sum of true positives and false negatives. Returns 0 if both true positives and false negatives are zero. \"\"\" return ( self . true_positives / ( self . true_positives + self . false_negatives ) if ( self . true_positives + self . false_negatives ) > 0 else 0.0 ) specificity () Calculate specificity. Specificity measures the proportion of actual negative instances correctly identified by the model. Returns: Name Type Description float float The specificity of the model, calculated as true negatives divided by the sum of true negatives float and false positives. Returns 0.0 if both true negatives and false positives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 def specificity ( self ) -> float : \"\"\" Calculate specificity. Specificity measures the proportion of actual negative instances correctly identified by the model. Returns: float: The specificity of the model, calculated as true negatives divided by the sum of true negatives and false positives. Returns 0.0 if both true negatives and false positives are zero. \"\"\" return ( self . true_negatives / ( self . true_negatives + self . false_positives ) if ( self . true_negatives + self . false_positives ) > 0 else 0.0 )","title":"Binary classification stats"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats","text":"A data class representing counts of different categories in binary classification. Attributes: Name Type Description true_positives int The count of true positive instances - i.e., the number of known entities ranked 1 in the results. true_negatives int The count of true negative instances - i.e., the number of non-relevant entities ranked at a position other than 1 in the results. false_positives int The count of false positive instances - i.e., the number of non-relevant entities ranked at position 1 in the results. false_negatives int The count of false negative instances - i.e., the number of known entities ranked at a position other than 1 in the results. Source code in src/pheval/analyse/binary_classification_stats.py 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 @dataclass class BinaryClassificationStats : \"\"\" A data class representing counts of different categories in binary classification. Attributes: true_positives (int): The count of true positive instances - i.e., the number of known entities ranked 1 in the results. true_negatives (int): The count of true negative instances - i.e., the number of non-relevant entities ranked at a position other than 1 in the results. false_positives (int): The count of false positive instances - i.e., the number of non-relevant entities ranked at position 1 in the results. false_negatives (int): The count of false negative instances - i.e., the number of known entities ranked at a position other than 1 in the results. \"\"\" true_positives : int = 0 true_negatives : int = 0 false_positives : int = 0 false_negatives : int = 0 labels : List = field ( default_factory = list ) scores : List = field ( default_factory = list ) @staticmethod def remove_relevant_ranks ( pheval_results : Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ], ], relevant_ranks : List [ int ], ) -> List [ int ]: \"\"\" Remove the relevant entity ranks from all result ranks Args: pheval_results: (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): The list of all pheval results. relevant_ranks (List[int]): A list of the ranks associated with the known entities. Returns: List[int]: A list of the ranks with the relevant entity ranks removed. \"\"\" all_result_ranks = [ pheval_result . rank for pheval_result in pheval_results ] for rank in relevant_ranks : if rank in all_result_ranks : all_result_ranks . remove ( rank ) continue return all_result_ranks def add_classification_for_known_entities ( self , relevant_ranks : List [ int ]) -> None : \"\"\" Update binary classification metrics for known entities based on their ranking. Args: relevant_ranks (List[int]): A list of the ranks associated with the known entities. \"\"\" for rank in relevant_ranks : if rank == 1 : self . true_positives += 1 elif rank != 1 : self . false_negatives += 1 def add_classification_for_other_entities ( self , ranks : List [ int ]) -> None : \"\"\" Update binary classification metrics for other entities based on their ranking. Args: ranks (List[int]): A list of the ranks for all other entities. \"\"\" for rank in ranks : if rank == 1 : self . false_positives += 1 elif rank != 1 : self . true_negatives += 1 def add_labels_and_scores ( self , pheval_results : Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ], ], relevant_ranks : List [ int ], ): \"\"\" Adds scores and labels from the PhEval results. Args: pheval_results (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): List of all PhEval results relevant_ranks (List[int]): A list of the ranks associated with the known entities. \"\"\" relevant_ranks_copy = relevant_ranks . copy () for result in pheval_results : self . scores . append ( result . score ) label = 1 if result . rank in relevant_ranks_copy else 0 self . labels . append ( label ) relevant_ranks_copy . remove ( result . rank ) if label == 1 else None def add_classification ( self , pheval_results : Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ], ], relevant_ranks : List [ int ], ) -> None : \"\"\" Update binary classification metrics for known and unknown entities based on their ranks. Args: pheval_results: (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): The list of all pheval results. relevant_ranks (List[int]): A list of the ranks associated with the known entities. \"\"\" self . add_classification_for_known_entities ( relevant_ranks ) self . add_classification_for_other_entities ( self . remove_relevant_ranks ( pheval_results , relevant_ranks ) ) self . add_labels_and_scores ( pheval_results , relevant_ranks ) def sensitivity ( self ) -> float : \"\"\" Calculate sensitivity. Sensitivity measures the proportion of actual positive instances correctly identified by the model. Returns: float: The sensitivity of the model, calculated as true positives divided by the sum of true positives and false negatives. Returns 0 if both true positives and false negatives are zero. \"\"\" return ( self . true_positives / ( self . true_positives + self . false_negatives ) if ( self . true_positives + self . false_negatives ) > 0 else 0.0 ) def specificity ( self ) -> float : \"\"\" Calculate specificity. Specificity measures the proportion of actual negative instances correctly identified by the model. Returns: float: The specificity of the model, calculated as true negatives divided by the sum of true negatives and false positives. Returns 0.0 if both true negatives and false positives are zero. \"\"\" return ( self . true_negatives / ( self . true_negatives + self . false_positives ) if ( self . true_negatives + self . false_positives ) > 0 else 0.0 ) def precision ( self ) -> float : \"\"\" Calculate precision. Precision measures the proportion of correctly predicted positive instances out of all instances predicted as positive. Returns: float: The precision of the model, calculated as true positives divided by the sum of true positives and false positives. Returns 0.0 if both true positives and false positives are zero. \"\"\" return ( self . true_positives / ( self . true_positives + self . false_positives ) if ( self . true_positives + self . false_positives ) > 0 else 0.0 ) def negative_predictive_value ( self ) -> float : \"\"\" Calculate Negative Predictive Value (NPV). NPV measures the proportion of correctly predicted negative instances out of all instances predicted negative. Returns: float: The Negative Predictive Value of the model, calculated as true negatives divided by the sum of true negatives and false negatives. Returns 0.0 if both true negatives and false negatives are zero. \"\"\" return ( self . true_negatives / ( self . true_negatives + self . false_negatives ) if ( self . true_negatives + self . false_negatives ) > 0 else 0.0 ) def false_positive_rate ( self ) -> float : \"\"\" Calculate False Positive Rate (FPR). FPR measures the proportion of instances predicted as positive that are actually negative. Returns: float: The False Positive Rate of the model, calculated as false positives divided by the sum of false positives and true negatives. Returns 0.0 if both false positives and true negatives are zero. \"\"\" return ( self . false_positives / ( self . false_positives + self . true_negatives ) if ( self . false_positives + self . true_negatives ) > 0 else 0.0 ) def false_discovery_rate ( self ) -> float : \"\"\" Calculate False Discovery Rate (FDR). FDR measures the proportion of instances predicted as positive that are actually negative. Returns: float: The False Discovery Rate of the model, calculated as false positives divided by the sum of false positives and true positives. Returns 0.0 if both false positives and true positives are zero. \"\"\" return ( self . false_positives / ( self . false_positives + self . true_positives ) if ( self . false_positives + self . true_positives ) > 0 else 0.0 ) def false_negative_rate ( self ) -> float : \"\"\" Calculate False Negative Rate (FNR). FNR measures the proportion of instances that are actually positive but predicted as negative. Returns: float: The False Negative Rate of the model, calculated as false negatives divided by the sum of false negatives and true positives. Returns 0.0 if both false negatives and true positives are zero. \"\"\" return ( self . false_negatives / ( self . false_negatives + self . true_positives ) if ( self . false_negatives + self . true_positives ) > 0 else 0.0 ) def accuracy ( self ) -> float : \"\"\" Calculate Accuracy. Accuracy measures the proportion of correctly predicted instances out of all instances. Returns: float: The Accuracy of the model, calculated as the sum of true positives and true negatives divided by the sum of true positives, false positives, true negatives, and false negatives. Returns 0.0 if the total sum of counts is zero. \"\"\" return ( ( self . true_positives + self . true_negatives ) / ( self . true_positives + self . false_positives + self . true_negatives + self . false_negatives ) if ( self . true_positives + self . false_negatives + self . true_negatives + self . false_negatives ) > 0 else 0.0 ) def f1_score ( self ) -> float : \"\"\" Calculate F1 Score. F1 Score is the harmonic mean of precision and recall, providing a balance between false positives and false negatives. Returns: float: The F1 Score of the model, calculated as 2 * TP / (2 * TP + FP + FN). Returns 0.0 if the denominator is zero. \"\"\" return ( ( 2 * self . true_positives ) / (( 2 * self . true_positives ) + self . false_positives + self . false_negatives ) if ( self . true_positives + self . false_positives + self . false_negatives ) > 0 else 0.0 ) def matthews_correlation_coefficient ( self ) -> float : \"\"\" Calculate Matthews Correlation Coefficient (MCC). MCC is a measure of the quality of binary classifications, accounting for imbalances in the data. Returns: float: The Matthews Correlation Coefficient of the model, calculated as ((TP * TN) - (FP * FN)) / sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)). Returns 0.0 if the denominator is zero. \"\"\" return ( ( ( self . true_positives * self . true_negatives ) - ( self . false_positives * self . false_negatives ) ) / ( sqrt ( ( self . true_positives + self . false_positives ) * ( self . true_positives + self . false_negatives ) * ( self . true_negatives + self . false_positives ) * ( self . true_negatives + self . false_negatives ) ) ) if ( self . true_positives + self . false_negatives + self . true_negatives + self . false_negatives ) > 0 else 0.0 )","title":"BinaryClassificationStats"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.accuracy","text":"Calculate Accuracy. Accuracy measures the proportion of correctly predicted instances out of all instances. Returns: Name Type Description float float The Accuracy of the model, calculated as the sum of true positives and true negatives divided by float the sum of true positives, false positives, true negatives, and false negatives. float Returns 0.0 if the total sum of counts is zero. Source code in src/pheval/analyse/binary_classification_stats.py 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 def accuracy ( self ) -> float : \"\"\" Calculate Accuracy. Accuracy measures the proportion of correctly predicted instances out of all instances. Returns: float: The Accuracy of the model, calculated as the sum of true positives and true negatives divided by the sum of true positives, false positives, true negatives, and false negatives. Returns 0.0 if the total sum of counts is zero. \"\"\" return ( ( self . true_positives + self . true_negatives ) / ( self . true_positives + self . false_positives + self . true_negatives + self . false_negatives ) if ( self . true_positives + self . false_negatives + self . true_negatives + self . false_negatives ) > 0 else 0.0 )","title":"accuracy()"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.add_classification","text":"Update binary classification metrics for known and unknown entities based on their ranks. Parameters: Name Type Description Default pheval_results Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ]] (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): The list of all pheval results. required relevant_ranks List [ int ] A list of the ranks associated with the known entities. required Source code in src/pheval/analyse/binary_classification_stats.py 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 def add_classification ( self , pheval_results : Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ], ], relevant_ranks : List [ int ], ) -> None : \"\"\" Update binary classification metrics for known and unknown entities based on their ranks. Args: pheval_results: (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): The list of all pheval results. relevant_ranks (List[int]): A list of the ranks associated with the known entities. \"\"\" self . add_classification_for_known_entities ( relevant_ranks ) self . add_classification_for_other_entities ( self . remove_relevant_ranks ( pheval_results , relevant_ranks ) ) self . add_labels_and_scores ( pheval_results , relevant_ranks )","title":"add_classification()"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.add_classification_for_known_entities","text":"Update binary classification metrics for known entities based on their ranking. Parameters: Name Type Description Default relevant_ranks List [ int ] A list of the ranks associated with the known entities. required Source code in src/pheval/analyse/binary_classification_stats.py 63 64 65 66 67 68 69 70 71 72 73 74 def add_classification_for_known_entities ( self , relevant_ranks : List [ int ]) -> None : \"\"\" Update binary classification metrics for known entities based on their ranking. Args: relevant_ranks (List[int]): A list of the ranks associated with the known entities. \"\"\" for rank in relevant_ranks : if rank == 1 : self . true_positives += 1 elif rank != 1 : self . false_negatives += 1","title":"add_classification_for_known_entities()"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.add_classification_for_other_entities","text":"Update binary classification metrics for other entities based on their ranking. Parameters: Name Type Description Default ranks List [ int ] A list of the ranks for all other entities. required Source code in src/pheval/analyse/binary_classification_stats.py 76 77 78 79 80 81 82 83 84 85 86 87 def add_classification_for_other_entities ( self , ranks : List [ int ]) -> None : \"\"\" Update binary classification metrics for other entities based on their ranking. Args: ranks (List[int]): A list of the ranks for all other entities. \"\"\" for rank in ranks : if rank == 1 : self . false_positives += 1 elif rank != 1 : self . true_negatives += 1","title":"add_classification_for_other_entities()"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.add_labels_and_scores","text":"Adds scores and labels from the PhEval results. Parameters: Name Type Description Default relevant_ranks List [ int ] A list of the ranks associated with the known entities. required Source code in src/pheval/analyse/binary_classification_stats.py 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 def add_labels_and_scores ( self , pheval_results : Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ], ], relevant_ranks : List [ int ], ): \"\"\" Adds scores and labels from the PhEval results. Args: pheval_results (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): List of all PhEval results relevant_ranks (List[int]): A list of the ranks associated with the known entities. \"\"\" relevant_ranks_copy = relevant_ranks . copy () for result in pheval_results : self . scores . append ( result . score ) label = 1 if result . rank in relevant_ranks_copy else 0 self . labels . append ( label ) relevant_ranks_copy . remove ( result . rank ) if label == 1 else None","title":"add_labels_and_scores()"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.f1_score","text":"Calculate F1 Score. F1 Score is the harmonic mean of precision and recall, providing a balance between false positives and false negatives. Returns: Name Type Description float float The F1 Score of the model, calculated as 2 * TP / (2 * TP + FP + FN). float Returns 0.0 if the denominator is zero. Source code in src/pheval/analyse/binary_classification_stats.py 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 def f1_score ( self ) -> float : \"\"\" Calculate F1 Score. F1 Score is the harmonic mean of precision and recall, providing a balance between false positives and false negatives. Returns: float: The F1 Score of the model, calculated as 2 * TP / (2 * TP + FP + FN). Returns 0.0 if the denominator is zero. \"\"\" return ( ( 2 * self . true_positives ) / (( 2 * self . true_positives ) + self . false_positives + self . false_negatives ) if ( self . true_positives + self . false_positives + self . false_negatives ) > 0 else 0.0 )","title":"f1_score()"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.false_discovery_rate","text":"Calculate False Discovery Rate (FDR). FDR measures the proportion of instances predicted as positive that are actually negative. Returns: Name Type Description float float The False Discovery Rate of the model, calculated as false positives divided by the sum of float false positives and true positives. Returns 0.0 if both false positives and true positives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 def false_discovery_rate ( self ) -> float : \"\"\" Calculate False Discovery Rate (FDR). FDR measures the proportion of instances predicted as positive that are actually negative. Returns: float: The False Discovery Rate of the model, calculated as false positives divided by the sum of false positives and true positives. Returns 0.0 if both false positives and true positives are zero. \"\"\" return ( self . false_positives / ( self . false_positives + self . true_positives ) if ( self . false_positives + self . true_positives ) > 0 else 0.0 )","title":"false_discovery_rate()"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.false_negative_rate","text":"Calculate False Negative Rate (FNR). FNR measures the proportion of instances that are actually positive but predicted as negative. Returns: Name Type Description float float The False Negative Rate of the model, calculated as false negatives divided by the sum of float false negatives and true positives. Returns 0.0 if both false negatives and true positives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 def false_negative_rate ( self ) -> float : \"\"\" Calculate False Negative Rate (FNR). FNR measures the proportion of instances that are actually positive but predicted as negative. Returns: float: The False Negative Rate of the model, calculated as false negatives divided by the sum of false negatives and true positives. Returns 0.0 if both false negatives and true positives are zero. \"\"\" return ( self . false_negatives / ( self . false_negatives + self . true_positives ) if ( self . false_negatives + self . true_positives ) > 0 else 0.0 )","title":"false_negative_rate()"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.false_positive_rate","text":"Calculate False Positive Rate (FPR). FPR measures the proportion of instances predicted as positive that are actually negative. Returns: Name Type Description float float The False Positive Rate of the model, calculated as false positives divided by the sum of float false positives and true negatives. Returns 0.0 if both false positives and true negatives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 def false_positive_rate ( self ) -> float : \"\"\" Calculate False Positive Rate (FPR). FPR measures the proportion of instances predicted as positive that are actually negative. Returns: float: The False Positive Rate of the model, calculated as false positives divided by the sum of false positives and true negatives. Returns 0.0 if both false positives and true negatives are zero. \"\"\" return ( self . false_positives / ( self . false_positives + self . true_negatives ) if ( self . false_positives + self . true_negatives ) > 0 else 0.0 )","title":"false_positive_rate()"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.matthews_correlation_coefficient","text":"Calculate Matthews Correlation Coefficient (MCC). MCC is a measure of the quality of binary classifications, accounting for imbalances in the data. Returns: Name Type Description float float The Matthews Correlation Coefficient of the model, calculated as float ((TP * TN) - (FP * FN)) / sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)). float Returns 0.0 if the denominator is zero. Source code in src/pheval/analyse/binary_classification_stats.py 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 def matthews_correlation_coefficient ( self ) -> float : \"\"\" Calculate Matthews Correlation Coefficient (MCC). MCC is a measure of the quality of binary classifications, accounting for imbalances in the data. Returns: float: The Matthews Correlation Coefficient of the model, calculated as ((TP * TN) - (FP * FN)) / sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)). Returns 0.0 if the denominator is zero. \"\"\" return ( ( ( self . true_positives * self . true_negatives ) - ( self . false_positives * self . false_negatives ) ) / ( sqrt ( ( self . true_positives + self . false_positives ) * ( self . true_positives + self . false_negatives ) * ( self . true_negatives + self . false_positives ) * ( self . true_negatives + self . false_negatives ) ) ) if ( self . true_positives + self . false_negatives + self . true_negatives + self . false_negatives ) > 0 else 0.0 )","title":"matthews_correlation_coefficient()"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.negative_predictive_value","text":"Calculate Negative Predictive Value (NPV). NPV measures the proportion of correctly predicted negative instances out of all instances predicted negative. Returns: Name Type Description float float The Negative Predictive Value of the model, calculated as true negatives divided by the sum of float true negatives and false negatives. Returns 0.0 if both true negatives and false negatives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 def negative_predictive_value ( self ) -> float : \"\"\" Calculate Negative Predictive Value (NPV). NPV measures the proportion of correctly predicted negative instances out of all instances predicted negative. Returns: float: The Negative Predictive Value of the model, calculated as true negatives divided by the sum of true negatives and false negatives. Returns 0.0 if both true negatives and false negatives are zero. \"\"\" return ( self . true_negatives / ( self . true_negatives + self . false_negatives ) if ( self . true_negatives + self . false_negatives ) > 0 else 0.0 )","title":"negative_predictive_value()"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.precision","text":"Calculate precision. Precision measures the proportion of correctly predicted positive instances out of all instances predicted as positive. Returns: Name Type Description float float The precision of the model, calculated as true positives divided by the sum of true positives float and false positives. Returns 0.0 if both true positives and false positives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 def precision ( self ) -> float : \"\"\" Calculate precision. Precision measures the proportion of correctly predicted positive instances out of all instances predicted as positive. Returns: float: The precision of the model, calculated as true positives divided by the sum of true positives and false positives. Returns 0.0 if both true positives and false positives are zero. \"\"\" return ( self . true_positives / ( self . true_positives + self . false_positives ) if ( self . true_positives + self . false_positives ) > 0 else 0.0 )","title":"precision()"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.remove_relevant_ranks","text":"Remove the relevant entity ranks from all result ranks Parameters: Name Type Description Default pheval_results Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ]] (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): The list of all pheval results. required relevant_ranks List [ int ] A list of the ranks associated with the known entities. required Returns: Type Description List [ int ] List[int]: A list of the ranks with the relevant entity ranks removed. Source code in src/pheval/analyse/binary_classification_stats.py 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 @staticmethod def remove_relevant_ranks ( pheval_results : Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ], ], relevant_ranks : List [ int ], ) -> List [ int ]: \"\"\" Remove the relevant entity ranks from all result ranks Args: pheval_results: (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): The list of all pheval results. relevant_ranks (List[int]): A list of the ranks associated with the known entities. Returns: List[int]: A list of the ranks with the relevant entity ranks removed. \"\"\" all_result_ranks = [ pheval_result . rank for pheval_result in pheval_results ] for rank in relevant_ranks : if rank in all_result_ranks : all_result_ranks . remove ( rank ) continue return all_result_ranks","title":"remove_relevant_ranks()"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.sensitivity","text":"Calculate sensitivity. Sensitivity measures the proportion of actual positive instances correctly identified by the model. Returns: Name Type Description float float The sensitivity of the model, calculated as true positives divided by the sum of true positives float and false negatives. Returns 0 if both true positives and false negatives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 def sensitivity ( self ) -> float : \"\"\" Calculate sensitivity. Sensitivity measures the proportion of actual positive instances correctly identified by the model. Returns: float: The sensitivity of the model, calculated as true positives divided by the sum of true positives and false negatives. Returns 0 if both true positives and false negatives are zero. \"\"\" return ( self . true_positives / ( self . true_positives + self . false_negatives ) if ( self . true_positives + self . false_negatives ) > 0 else 0.0 )","title":"sensitivity()"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.specificity","text":"Calculate specificity. Specificity measures the proportion of actual negative instances correctly identified by the model. Returns: Name Type Description float float The specificity of the model, calculated as true negatives divided by the sum of true negatives float and false positives. Returns 0.0 if both true negatives and false positives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 def specificity ( self ) -> float : \"\"\" Calculate specificity. Specificity measures the proportion of actual negative instances correctly identified by the model. Returns: float: The specificity of the model, calculated as true negatives divided by the sum of true negatives and false positives. Returns 0.0 if both true negatives and false positives are zero. \"\"\" return ( self . true_negatives / ( self . true_negatives + self . false_positives ) if ( self . true_negatives + self . false_positives ) > 0 else 0.0 )","title":"specificity()"},{"location":"api/pheval/analyse/disease_prioritisation_analysis/","text":"AssessDiseasePrioritisation Class for assessing disease prioritisation based on thresholds and scoring orders. Source code in src/pheval/analyse/disease_prioritisation_analysis.py 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 class AssessDiseasePrioritisation : \"\"\"Class for assessing disease prioritisation based on thresholds and scoring orders.\"\"\" def __init__ ( self , phenopacket_path : Path , results_dir : Path , standardised_disease_results : List [ RankedPhEvalDiseaseResult ], threshold : float , score_order : str , proband_diseases : List [ ProbandDisease ], ): \"\"\" Initialise AssessDiseasePrioritisation class Args: phenopacket_path (Path): Path to the phenopacket file results_dir (Path): Path to the results directory standardised_disease_results (List[RankedPhEvalDiseaseResult]): List of ranked PhEval disease results threshold (float): Threshold for scores score_order (str): Score order for results, either ascending or descending proband_diseases (List[ProbandDisease]): List of proband diseases \"\"\" self . phenopacket_path = phenopacket_path self . results_dir = results_dir self . standardised_disease_results = standardised_disease_results self . threshold = threshold self . score_order = score_order self . proband_diseases = proband_diseases def _record_disease_prioritisation_match ( self , disease : ProbandDisease , result_entry : RankedPhEvalDiseaseResult , rank_stats : RankStats , ) -> DiseasePrioritisationResult : \"\"\" Record the disease prioritisation rank if found within the results Args: disease (ProbandDisease): Diagnosed proband disease result_entry (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry rank_stats (RankStats): RankStats class instance Returns: DiseasePrioritisationResult: Recorded correct disease prioritisation rank result \"\"\" rank = result_entry . rank rank_stats . add_rank ( rank ) return DiseasePrioritisationResult ( self . phenopacket_path , disease , rank ) def _assess_disease_with_threshold_ascending_order ( self , result_entry : RankedPhEvalDiseaseResult , disease : ProbandDisease , rank_stats : RankStats , ) -> DiseasePrioritisationResult : \"\"\" Record the disease prioritisation rank if it meets the ascending order threshold. This method checks if the disease prioritisation rank meets the ascending order threshold. If the score of the result entry is less than the threshold, it records the disease rank. Args: result_entry (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry disease (ProbandDisease): Diagnosed proband disease rank_stats (RankStats): RankStats class instance Returns: DiseasePrioritisationResult: Recorded correct disease prioritisation rank result \"\"\" if float ( self . threshold ) > float ( result_entry . score ): return self . _record_disease_prioritisation_match ( disease , result_entry , rank_stats ) def _assess_disease_with_threshold ( self , result_entry : RankedPhEvalDiseaseResult , disease : ProbandDisease , rank_stats : RankStats , ) -> DiseasePrioritisationResult : \"\"\" Record the disease prioritisation rank if it meets the score threshold. This method checks if the disease prioritisation rank meets the score threshold. If the score of the result entry is greater than the threshold, it records the disease rank. Args: result_entry (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry disease (ProbandDisease): Diagnosed proband disease rank_stats (RankStats): RankStats class instance Returns: DiseasePrioritisationResult: Recorded correct disease prioritisation rank result \"\"\" if float ( self . threshold ) < float ( result_entry . score ): return self . _record_disease_prioritisation_match ( disease , result_entry , rank_stats ) def _record_matched_disease ( self , disease : ProbandDisease , rank_stats : RankStats , standardised_disease_result : RankedPhEvalDiseaseResult , ) -> DiseasePrioritisationResult : \"\"\" Return the disease rank result - handling the specification of a threshold. This method determines and returns the disease rank result based on the specified threshold and score order. If the threshold is 0.0, it records the disease rank directly. Otherwise, it assesses the disease with the threshold based on the score order. Args: disease (ProbandDisease): Diagnosed proband disease rank_stats (RankStats): RankStats class instance standardised_disease_result (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry Returns: DiseasePrioritisationResult: Recorded correct disease prioritisation rank result \"\"\" if float ( self . threshold ) == 0.0 : return self . _record_disease_prioritisation_match ( disease , standardised_disease_result , rank_stats ) else : return ( self . _assess_disease_with_threshold ( standardised_disease_result , disease , rank_stats ) if self . score_order != \"ascending\" else self . _assess_disease_with_threshold_ascending_order ( standardised_disease_result , disease , rank_stats ) ) def assess_disease_prioritisation ( self , rank_stats : RankStats , rank_records : defaultdict , binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess disease prioritisation. This method assesses the prioritisation of diseases based on the provided criteria and records ranks using a PrioritisationRankRecorder. Args: rank_stats (RankStats): RankStats class instance rank_records (defaultdict): A defaultdict to store the correct ranked results. binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" relevant_ranks = [] for disease in self . proband_diseases : rank_stats . total += 1 disease_match = DiseasePrioritisationResult ( self . phenopacket_path , disease ) for standardised_disease_result in self . standardised_disease_results : if ( disease . disease_identifier == standardised_disease_result . disease_identifier or disease . disease_name == standardised_disease_result . disease_name ): disease_match = self . _record_matched_disease ( disease , rank_stats , standardised_disease_result ) ( relevant_ranks . append ( disease_match . rank ) if disease_match else relevant_ranks . append ( 0 ) ) break PrioritisationRankRecorder ( rank_stats . total , self . results_dir , ( DiseasePrioritisationResult ( self . phenopacket_path , disease ) if disease_match is None else disease_match ), rank_records , ) . record_rank () rank_stats . relevant_result_ranks . append ( relevant_ranks ) binary_classification_stats . add_classification ( self . standardised_disease_results , relevant_ranks ) __init__ ( phenopacket_path , results_dir , standardised_disease_results , threshold , score_order , proband_diseases ) Initialise AssessDiseasePrioritisation class Parameters: Name Type Description Default phenopacket_path Path Path to the phenopacket file required results_dir Path Path to the results directory required standardised_disease_results List [ RankedPhEvalDiseaseResult ] List of ranked PhEval disease results required threshold float Threshold for scores required score_order str Score order for results, either ascending or descending required proband_diseases List [ ProbandDisease ] List of proband diseases required Source code in src/pheval/analyse/disease_prioritisation_analysis.py 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 def __init__ ( self , phenopacket_path : Path , results_dir : Path , standardised_disease_results : List [ RankedPhEvalDiseaseResult ], threshold : float , score_order : str , proband_diseases : List [ ProbandDisease ], ): \"\"\" Initialise AssessDiseasePrioritisation class Args: phenopacket_path (Path): Path to the phenopacket file results_dir (Path): Path to the results directory standardised_disease_results (List[RankedPhEvalDiseaseResult]): List of ranked PhEval disease results threshold (float): Threshold for scores score_order (str): Score order for results, either ascending or descending proband_diseases (List[ProbandDisease]): List of proband diseases \"\"\" self . phenopacket_path = phenopacket_path self . results_dir = results_dir self . standardised_disease_results = standardised_disease_results self . threshold = threshold self . score_order = score_order self . proband_diseases = proband_diseases assess_disease_prioritisation ( rank_stats , rank_records , binary_classification_stats ) Assess disease prioritisation. This method assesses the prioritisation of diseases based on the provided criteria and records ranks using a PrioritisationRankRecorder. Parameters: Name Type Description Default rank_stats RankStats RankStats class instance required rank_records defaultdict A defaultdict to store the correct ranked results. required binary_classification_stats BinaryClassificationStats BinaryClassificationStats class instance. required Source code in src/pheval/analyse/disease_prioritisation_analysis.py 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 def assess_disease_prioritisation ( self , rank_stats : RankStats , rank_records : defaultdict , binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess disease prioritisation. This method assesses the prioritisation of diseases based on the provided criteria and records ranks using a PrioritisationRankRecorder. Args: rank_stats (RankStats): RankStats class instance rank_records (defaultdict): A defaultdict to store the correct ranked results. binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" relevant_ranks = [] for disease in self . proband_diseases : rank_stats . total += 1 disease_match = DiseasePrioritisationResult ( self . phenopacket_path , disease ) for standardised_disease_result in self . standardised_disease_results : if ( disease . disease_identifier == standardised_disease_result . disease_identifier or disease . disease_name == standardised_disease_result . disease_name ): disease_match = self . _record_matched_disease ( disease , rank_stats , standardised_disease_result ) ( relevant_ranks . append ( disease_match . rank ) if disease_match else relevant_ranks . append ( 0 ) ) break PrioritisationRankRecorder ( rank_stats . total , self . results_dir , ( DiseasePrioritisationResult ( self . phenopacket_path , disease ) if disease_match is None else disease_match ), rank_records , ) . record_rank () rank_stats . relevant_result_ranks . append ( relevant_ranks ) binary_classification_stats . add_classification ( self . standardised_disease_results , relevant_ranks ) assess_phenopacket_disease_prioritisation ( phenopacket_path , score_order , results_dir_and_input , threshold , disease_rank_stats , disease_rank_comparison , disease_binary_classification_stats ) Assess disease prioritisation for a Phenopacket by comparing PhEval standardised disease results against the recorded causative diseases for a proband in the Phenopacket. Parameters: Name Type Description Default phenopacket_path Path Path to the Phenopacket. required score_order str The order in which scores are arranged, either ascending or descending. required results_dir_and_input TrackInputOutputDirectories Input and output directories. required threshold float Threshold for assessment. required disease_rank_stats RankStats RankStats class instance. required disease_rank_comparison defaultdict Default dictionary for disease rank comparisons. required disease_binary_classification_stats BinaryClassificationStats BinaryClassificationStats class instance. required Source code in src/pheval/analyse/disease_prioritisation_analysis.py 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 def assess_phenopacket_disease_prioritisation ( phenopacket_path : Path , score_order : str , results_dir_and_input : TrackInputOutputDirectories , threshold : float , disease_rank_stats : RankStats , disease_rank_comparison : defaultdict , disease_binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess disease prioritisation for a Phenopacket by comparing PhEval standardised disease results against the recorded causative diseases for a proband in the Phenopacket. Args: phenopacket_path (Path): Path to the Phenopacket. score_order (str): The order in which scores are arranged, either ascending or descending. results_dir_and_input (TrackInputOutputDirectories): Input and output directories. threshold (float): Threshold for assessment. disease_rank_stats (RankStats): RankStats class instance. disease_rank_comparison (defaultdict): Default dictionary for disease rank comparisons. disease_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" standardised_disease_result = results_dir_and_input . results_dir . joinpath ( f \"pheval_disease_results/ { phenopacket_path . stem } -pheval_disease_result.tsv\" ) pheval_disease_result = read_standardised_result ( standardised_disease_result ) proband_diseases = _obtain_causative_diseases ( phenopacket_path ) AssessDiseasePrioritisation ( phenopacket_path , results_dir_and_input . results_dir . joinpath ( \"pheval_disease_results/\" ), parse_pheval_result ( RankedPhEvalDiseaseResult , pheval_disease_result ), threshold , score_order , proband_diseases , ) . assess_disease_prioritisation ( disease_rank_stats , disease_rank_comparison , disease_binary_classification_stats ) benchmark_disease_prioritisation ( results_directory_and_input , score_order , threshold , disease_rank_comparison ) Benchmark a directory based on disease prioritisation results. Parameters: Name Type Description Default results_directory_and_input TrackInputOutputDirectories Input and output directories. required score_order str The order in which scores are arranged. required threshold float Threshold for assessment. required disease_rank_comparison defaultdict Default dictionary for disease rank comparisons. required Returns: Name Type Description BenchmarkRunResults An object containing benchmarking results for disease prioritisation, including ranks and rank statistics for the benchmarked directory. Source code in src/pheval/analyse/disease_prioritisation_analysis.py 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 def benchmark_disease_prioritisation ( results_directory_and_input : TrackInputOutputDirectories , score_order : str , threshold : float , disease_rank_comparison : defaultdict , ): \"\"\" Benchmark a directory based on disease prioritisation results. Args: results_directory_and_input (TrackInputOutputDirectories): Input and output directories. score_order (str): The order in which scores are arranged. threshold (float): Threshold for assessment. disease_rank_comparison (defaultdict): Default dictionary for disease rank comparisons. Returns: BenchmarkRunResults: An object containing benchmarking results for disease prioritisation, including ranks and rank statistics for the benchmarked directory. \"\"\" disease_rank_stats = RankStats () disease_binary_classification_stats = BinaryClassificationStats () for phenopacket_path in all_files ( results_directory_and_input . phenopacket_dir ): assess_phenopacket_disease_prioritisation ( phenopacket_path , score_order , results_directory_and_input , threshold , disease_rank_stats , disease_rank_comparison , disease_binary_classification_stats , ) return BenchmarkRunResults ( results_dir = results_directory_and_input . results_dir , ranks = disease_rank_comparison , rank_stats = disease_rank_stats , binary_classification_stats = disease_binary_classification_stats , )","title":"Disease prioritisation analysis"},{"location":"api/pheval/analyse/disease_prioritisation_analysis/#src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioritisation","text":"Class for assessing disease prioritisation based on thresholds and scoring orders. Source code in src/pheval/analyse/disease_prioritisation_analysis.py 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 class AssessDiseasePrioritisation : \"\"\"Class for assessing disease prioritisation based on thresholds and scoring orders.\"\"\" def __init__ ( self , phenopacket_path : Path , results_dir : Path , standardised_disease_results : List [ RankedPhEvalDiseaseResult ], threshold : float , score_order : str , proband_diseases : List [ ProbandDisease ], ): \"\"\" Initialise AssessDiseasePrioritisation class Args: phenopacket_path (Path): Path to the phenopacket file results_dir (Path): Path to the results directory standardised_disease_results (List[RankedPhEvalDiseaseResult]): List of ranked PhEval disease results threshold (float): Threshold for scores score_order (str): Score order for results, either ascending or descending proband_diseases (List[ProbandDisease]): List of proband diseases \"\"\" self . phenopacket_path = phenopacket_path self . results_dir = results_dir self . standardised_disease_results = standardised_disease_results self . threshold = threshold self . score_order = score_order self . proband_diseases = proband_diseases def _record_disease_prioritisation_match ( self , disease : ProbandDisease , result_entry : RankedPhEvalDiseaseResult , rank_stats : RankStats , ) -> DiseasePrioritisationResult : \"\"\" Record the disease prioritisation rank if found within the results Args: disease (ProbandDisease): Diagnosed proband disease result_entry (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry rank_stats (RankStats): RankStats class instance Returns: DiseasePrioritisationResult: Recorded correct disease prioritisation rank result \"\"\" rank = result_entry . rank rank_stats . add_rank ( rank ) return DiseasePrioritisationResult ( self . phenopacket_path , disease , rank ) def _assess_disease_with_threshold_ascending_order ( self , result_entry : RankedPhEvalDiseaseResult , disease : ProbandDisease , rank_stats : RankStats , ) -> DiseasePrioritisationResult : \"\"\" Record the disease prioritisation rank if it meets the ascending order threshold. This method checks if the disease prioritisation rank meets the ascending order threshold. If the score of the result entry is less than the threshold, it records the disease rank. Args: result_entry (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry disease (ProbandDisease): Diagnosed proband disease rank_stats (RankStats): RankStats class instance Returns: DiseasePrioritisationResult: Recorded correct disease prioritisation rank result \"\"\" if float ( self . threshold ) > float ( result_entry . score ): return self . _record_disease_prioritisation_match ( disease , result_entry , rank_stats ) def _assess_disease_with_threshold ( self , result_entry : RankedPhEvalDiseaseResult , disease : ProbandDisease , rank_stats : RankStats , ) -> DiseasePrioritisationResult : \"\"\" Record the disease prioritisation rank if it meets the score threshold. This method checks if the disease prioritisation rank meets the score threshold. If the score of the result entry is greater than the threshold, it records the disease rank. Args: result_entry (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry disease (ProbandDisease): Diagnosed proband disease rank_stats (RankStats): RankStats class instance Returns: DiseasePrioritisationResult: Recorded correct disease prioritisation rank result \"\"\" if float ( self . threshold ) < float ( result_entry . score ): return self . _record_disease_prioritisation_match ( disease , result_entry , rank_stats ) def _record_matched_disease ( self , disease : ProbandDisease , rank_stats : RankStats , standardised_disease_result : RankedPhEvalDiseaseResult , ) -> DiseasePrioritisationResult : \"\"\" Return the disease rank result - handling the specification of a threshold. This method determines and returns the disease rank result based on the specified threshold and score order. If the threshold is 0.0, it records the disease rank directly. Otherwise, it assesses the disease with the threshold based on the score order. Args: disease (ProbandDisease): Diagnosed proband disease rank_stats (RankStats): RankStats class instance standardised_disease_result (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry Returns: DiseasePrioritisationResult: Recorded correct disease prioritisation rank result \"\"\" if float ( self . threshold ) == 0.0 : return self . _record_disease_prioritisation_match ( disease , standardised_disease_result , rank_stats ) else : return ( self . _assess_disease_with_threshold ( standardised_disease_result , disease , rank_stats ) if self . score_order != \"ascending\" else self . _assess_disease_with_threshold_ascending_order ( standardised_disease_result , disease , rank_stats ) ) def assess_disease_prioritisation ( self , rank_stats : RankStats , rank_records : defaultdict , binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess disease prioritisation. This method assesses the prioritisation of diseases based on the provided criteria and records ranks using a PrioritisationRankRecorder. Args: rank_stats (RankStats): RankStats class instance rank_records (defaultdict): A defaultdict to store the correct ranked results. binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" relevant_ranks = [] for disease in self . proband_diseases : rank_stats . total += 1 disease_match = DiseasePrioritisationResult ( self . phenopacket_path , disease ) for standardised_disease_result in self . standardised_disease_results : if ( disease . disease_identifier == standardised_disease_result . disease_identifier or disease . disease_name == standardised_disease_result . disease_name ): disease_match = self . _record_matched_disease ( disease , rank_stats , standardised_disease_result ) ( relevant_ranks . append ( disease_match . rank ) if disease_match else relevant_ranks . append ( 0 ) ) break PrioritisationRankRecorder ( rank_stats . total , self . results_dir , ( DiseasePrioritisationResult ( self . phenopacket_path , disease ) if disease_match is None else disease_match ), rank_records , ) . record_rank () rank_stats . relevant_result_ranks . append ( relevant_ranks ) binary_classification_stats . add_classification ( self . standardised_disease_results , relevant_ranks )","title":"AssessDiseasePrioritisation"},{"location":"api/pheval/analyse/disease_prioritisation_analysis/#src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioritisation.__init__","text":"Initialise AssessDiseasePrioritisation class Parameters: Name Type Description Default phenopacket_path Path Path to the phenopacket file required results_dir Path Path to the results directory required standardised_disease_results List [ RankedPhEvalDiseaseResult ] List of ranked PhEval disease results required threshold float Threshold for scores required score_order str Score order for results, either ascending or descending required proband_diseases List [ ProbandDisease ] List of proband diseases required Source code in src/pheval/analyse/disease_prioritisation_analysis.py 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 def __init__ ( self , phenopacket_path : Path , results_dir : Path , standardised_disease_results : List [ RankedPhEvalDiseaseResult ], threshold : float , score_order : str , proband_diseases : List [ ProbandDisease ], ): \"\"\" Initialise AssessDiseasePrioritisation class Args: phenopacket_path (Path): Path to the phenopacket file results_dir (Path): Path to the results directory standardised_disease_results (List[RankedPhEvalDiseaseResult]): List of ranked PhEval disease results threshold (float): Threshold for scores score_order (str): Score order for results, either ascending or descending proband_diseases (List[ProbandDisease]): List of proband diseases \"\"\" self . phenopacket_path = phenopacket_path self . results_dir = results_dir self . standardised_disease_results = standardised_disease_results self . threshold = threshold self . score_order = score_order self . proband_diseases = proband_diseases","title":"__init__()"},{"location":"api/pheval/analyse/disease_prioritisation_analysis/#src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioritisation.assess_disease_prioritisation","text":"Assess disease prioritisation. This method assesses the prioritisation of diseases based on the provided criteria and records ranks using a PrioritisationRankRecorder. Parameters: Name Type Description Default rank_stats RankStats RankStats class instance required rank_records defaultdict A defaultdict to store the correct ranked results. required binary_classification_stats BinaryClassificationStats BinaryClassificationStats class instance. required Source code in src/pheval/analyse/disease_prioritisation_analysis.py 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 def assess_disease_prioritisation ( self , rank_stats : RankStats , rank_records : defaultdict , binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess disease prioritisation. This method assesses the prioritisation of diseases based on the provided criteria and records ranks using a PrioritisationRankRecorder. Args: rank_stats (RankStats): RankStats class instance rank_records (defaultdict): A defaultdict to store the correct ranked results. binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" relevant_ranks = [] for disease in self . proband_diseases : rank_stats . total += 1 disease_match = DiseasePrioritisationResult ( self . phenopacket_path , disease ) for standardised_disease_result in self . standardised_disease_results : if ( disease . disease_identifier == standardised_disease_result . disease_identifier or disease . disease_name == standardised_disease_result . disease_name ): disease_match = self . _record_matched_disease ( disease , rank_stats , standardised_disease_result ) ( relevant_ranks . append ( disease_match . rank ) if disease_match else relevant_ranks . append ( 0 ) ) break PrioritisationRankRecorder ( rank_stats . total , self . results_dir , ( DiseasePrioritisationResult ( self . phenopacket_path , disease ) if disease_match is None else disease_match ), rank_records , ) . record_rank () rank_stats . relevant_result_ranks . append ( relevant_ranks ) binary_classification_stats . add_classification ( self . standardised_disease_results , relevant_ranks )","title":"assess_disease_prioritisation()"},{"location":"api/pheval/analyse/disease_prioritisation_analysis/#src.pheval.analyse.disease_prioritisation_analysis.assess_phenopacket_disease_prioritisation","text":"Assess disease prioritisation for a Phenopacket by comparing PhEval standardised disease results against the recorded causative diseases for a proband in the Phenopacket. Parameters: Name Type Description Default phenopacket_path Path Path to the Phenopacket. required score_order str The order in which scores are arranged, either ascending or descending. required results_dir_and_input TrackInputOutputDirectories Input and output directories. required threshold float Threshold for assessment. required disease_rank_stats RankStats RankStats class instance. required disease_rank_comparison defaultdict Default dictionary for disease rank comparisons. required disease_binary_classification_stats BinaryClassificationStats BinaryClassificationStats class instance. required Source code in src/pheval/analyse/disease_prioritisation_analysis.py 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 def assess_phenopacket_disease_prioritisation ( phenopacket_path : Path , score_order : str , results_dir_and_input : TrackInputOutputDirectories , threshold : float , disease_rank_stats : RankStats , disease_rank_comparison : defaultdict , disease_binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess disease prioritisation for a Phenopacket by comparing PhEval standardised disease results against the recorded causative diseases for a proband in the Phenopacket. Args: phenopacket_path (Path): Path to the Phenopacket. score_order (str): The order in which scores are arranged, either ascending or descending. results_dir_and_input (TrackInputOutputDirectories): Input and output directories. threshold (float): Threshold for assessment. disease_rank_stats (RankStats): RankStats class instance. disease_rank_comparison (defaultdict): Default dictionary for disease rank comparisons. disease_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" standardised_disease_result = results_dir_and_input . results_dir . joinpath ( f \"pheval_disease_results/ { phenopacket_path . stem } -pheval_disease_result.tsv\" ) pheval_disease_result = read_standardised_result ( standardised_disease_result ) proband_diseases = _obtain_causative_diseases ( phenopacket_path ) AssessDiseasePrioritisation ( phenopacket_path , results_dir_and_input . results_dir . joinpath ( \"pheval_disease_results/\" ), parse_pheval_result ( RankedPhEvalDiseaseResult , pheval_disease_result ), threshold , score_order , proband_diseases , ) . assess_disease_prioritisation ( disease_rank_stats , disease_rank_comparison , disease_binary_classification_stats )","title":"assess_phenopacket_disease_prioritisation()"},{"location":"api/pheval/analyse/disease_prioritisation_analysis/#src.pheval.analyse.disease_prioritisation_analysis.benchmark_disease_prioritisation","text":"Benchmark a directory based on disease prioritisation results. Parameters: Name Type Description Default results_directory_and_input TrackInputOutputDirectories Input and output directories. required score_order str The order in which scores are arranged. required threshold float Threshold for assessment. required disease_rank_comparison defaultdict Default dictionary for disease rank comparisons. required Returns: Name Type Description BenchmarkRunResults An object containing benchmarking results for disease prioritisation, including ranks and rank statistics for the benchmarked directory. Source code in src/pheval/analyse/disease_prioritisation_analysis.py 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 def benchmark_disease_prioritisation ( results_directory_and_input : TrackInputOutputDirectories , score_order : str , threshold : float , disease_rank_comparison : defaultdict , ): \"\"\" Benchmark a directory based on disease prioritisation results. Args: results_directory_and_input (TrackInputOutputDirectories): Input and output directories. score_order (str): The order in which scores are arranged. threshold (float): Threshold for assessment. disease_rank_comparison (defaultdict): Default dictionary for disease rank comparisons. Returns: BenchmarkRunResults: An object containing benchmarking results for disease prioritisation, including ranks and rank statistics for the benchmarked directory. \"\"\" disease_rank_stats = RankStats () disease_binary_classification_stats = BinaryClassificationStats () for phenopacket_path in all_files ( results_directory_and_input . phenopacket_dir ): assess_phenopacket_disease_prioritisation ( phenopacket_path , score_order , results_directory_and_input , threshold , disease_rank_stats , disease_rank_comparison , disease_binary_classification_stats , ) return BenchmarkRunResults ( results_dir = results_directory_and_input . results_dir , ranks = disease_rank_comparison , rank_stats = disease_rank_stats , binary_classification_stats = disease_binary_classification_stats , )","title":"benchmark_disease_prioritisation()"},{"location":"api/pheval/analyse/gene_prioritisation_analysis/","text":"AssessGenePrioritisation Class for assessing gene prioritisation based on thresholds and scoring orders. Source code in src/pheval/analyse/gene_prioritisation_analysis.py 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 class AssessGenePrioritisation : \"\"\"Class for assessing gene prioritisation based on thresholds and scoring orders.\"\"\" def __init__ ( self , phenopacket_path : Path , results_dir : Path , standardised_gene_results : List [ RankedPhEvalGeneResult ], threshold : float , score_order : str , proband_causative_genes : List [ ProbandCausativeGene ], ): \"\"\" Initialise AssessGenePrioritisation class. Args: phenopacket_path (Path): Path to the phenopacket file results_dir (Path): Path to the results directory standardised_gene_results (List[RankedPhEvalGeneResult]): List of ranked PhEval gene results threshold (float): Threshold for scores score_order (str): Score order for results, either ascending or descending proband_causative_genes (List[ProbandCausativeGene]): List of proband causative genes \"\"\" self . phenopacket_path = phenopacket_path self . results_dir = results_dir self . standardised_gene_results = standardised_gene_results self . threshold = threshold self . score_order = score_order self . proband_causative_genes = proband_causative_genes def _record_gene_prioritisation_match ( self , gene : ProbandCausativeGene , result_entry : RankedPhEvalGeneResult , rank_stats : RankStats , ) -> GenePrioritisationResult : \"\"\" Record the gene prioritisation rank if found within the results Args: gene (ProbandCausativeGene): Diagnosed proband gene result_entry (RankedPhEvalGeneResult): Ranked PhEval gene result entry rank_stats (RankStats): RankStats class instance Returns: GenePrioritisationResult: Recorded correct gene prioritisation rank result \"\"\" rank = result_entry . rank rank_stats . add_rank ( rank ) return GenePrioritisationResult ( self . phenopacket_path , gene . gene_symbol , rank ) def _assess_gene_with_threshold_ascending_order ( self , result_entry : RankedPhEvalGeneResult , gene : ProbandCausativeGene , rank_stats : RankStats , ) -> GenePrioritisationResult : \"\"\" Record the gene prioritisation rank if it meets the ascending order threshold. This method checks if the gene prioritisation rank meets the ascending order threshold. If the score of the result entry is less than the threshold, it records the gene rank. Args: result_entry (RankedPhEvalGeneResult): Ranked PhEval gene result entry gene (ProbandCausativeGene): Diagnosed proband gene rank_stats (RankStats): RankStats class instance Returns: GenePrioritisationResult: Recorded correct gene prioritisation rank result \"\"\" if float ( self . threshold ) > float ( result_entry . score ): return self . _record_gene_prioritisation_match ( gene , result_entry , rank_stats ) def _assess_gene_with_threshold ( self , result_entry : RankedPhEvalGeneResult , gene : ProbandCausativeGene , rank_stats : RankStats , ) -> GenePrioritisationResult : \"\"\" Record the gene prioritisation rank if it meets the score threshold. This method checks if the gene prioritisation rank meets the score threshold. If the score of the result entry is greater than the threshold, it records the gene rank. Args: result_entry (RankedPhEvalResult): Ranked PhEval gene result entry gene (ProbandCausativeGene): Diagnosed proband gene rank_stats (RankStats): RankStats class instance Returns: GenePrioritisationResult: Recorded correct gene prioritisation rank result \"\"\" if float ( self . threshold ) < float ( result_entry . score ): return self . _record_gene_prioritisation_match ( gene , result_entry , rank_stats ) def _record_matched_gene ( self , gene : ProbandCausativeGene , rank_stats : RankStats , standardised_gene_result : RankedPhEvalGeneResult , ) -> GenePrioritisationResult : \"\"\" Return the gene rank result - handling the specification of a threshold. This method determines and returns the gene rank result based on the specified threshold and score order. If the threshold is 0.0, it records the gene rank directly. Otherwise, it assesses the gene with the threshold based on the score order. Args: gene (ProbandCausativeGene): Diagnosed proband gene rank_stats (RankStats): RankStats class instance standardised_gene_result (RankedPhEvalGeneResult): Ranked PhEval gene result entry Returns: GenePrioritisationResult: Recorded correct gene prioritisation rank result \"\"\" if float ( self . threshold ) == 0.0 : return self . _record_gene_prioritisation_match ( gene , standardised_gene_result , rank_stats ) else : return ( self . _assess_gene_with_threshold ( standardised_gene_result , gene , rank_stats ) if self . score_order != \"ascending\" else self . _assess_gene_with_threshold_ascending_order ( standardised_gene_result , gene , rank_stats ) ) @staticmethod def _check_string_representation ( entity : str ) -> Union [ List [ str ], str ]: \"\"\" Check if the input string is a representation of a list and returns the list if true, otherwise the string. Args: entity (str): The input entity to check. Returns: Union[List[str], str]: A list if the input string is a list representation, otherwise the original string. \"\"\" list_pattern = re . compile ( r \"^\\[\\s*(?:[^\\[\\],\\s]+(?:\\s*,\\s*[^\\[\\],\\s]+)*)?\\s*\\]$\" ) if list_pattern . match ( str ( entity )): return ast . literal_eval ( entity ) else : return entity def assess_gene_prioritisation ( self , rank_stats : RankStats , rank_records : defaultdict , binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess gene prioritisation. This method assesses the prioritisation of genes based on the provided criteria and records ranks using a PrioritisationRankRecorder. Args: rank_stats (RankStats): RankStats class instance rank_records (defaultdict): A defaultdict to store the correct ranked results. binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" relevant_ranks = [] for gene in self . proband_causative_genes : rank_stats . total += 1 gene_match = GenePrioritisationResult ( self . phenopacket_path , gene . gene_symbol ) for standardised_gene_result in self . standardised_gene_results : gene_identifier = self . _check_string_representation ( standardised_gene_result . gene_identifier ) gene_symbol = self . _check_string_representation ( standardised_gene_result . gene_symbol ) if ( isinstance ( gene_identifier , list ) and gene . gene_identifier in gene_identifier or isinstance ( gene_identifier , str ) and gene . gene_identifier == str or isinstance ( gene_symbol , list ) and gene . gene_symbol in gene_symbol or isinstance ( gene_symbol , str ) and gene . gene_symbol == gene_symbol ): gene_match = self . _record_matched_gene ( gene , rank_stats , standardised_gene_result ) ( relevant_ranks . append ( gene_match . rank ) if gene_match else relevant_ranks . append ( 0 ) ) break PrioritisationRankRecorder ( rank_stats . total , self . results_dir , ( GenePrioritisationResult ( self . phenopacket_path , gene . gene_symbol ) if gene_match is None else gene_match ), rank_records , ) . record_rank () rank_stats . relevant_result_ranks . append ( relevant_ranks ) binary_classification_stats . add_classification ( pheval_results = self . standardised_gene_results , relevant_ranks = relevant_ranks ) __init__ ( phenopacket_path , results_dir , standardised_gene_results , threshold , score_order , proband_causative_genes ) Initialise AssessGenePrioritisation class. Parameters: Name Type Description Default phenopacket_path Path Path to the phenopacket file required results_dir Path Path to the results directory required standardised_gene_results List [ RankedPhEvalGeneResult ] List of ranked PhEval gene results required threshold float Threshold for scores required score_order str Score order for results, either ascending or descending required proband_causative_genes List [ ProbandCausativeGene ] List of proband causative genes required Source code in src/pheval/analyse/gene_prioritisation_analysis.py 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 def __init__ ( self , phenopacket_path : Path , results_dir : Path , standardised_gene_results : List [ RankedPhEvalGeneResult ], threshold : float , score_order : str , proband_causative_genes : List [ ProbandCausativeGene ], ): \"\"\" Initialise AssessGenePrioritisation class. Args: phenopacket_path (Path): Path to the phenopacket file results_dir (Path): Path to the results directory standardised_gene_results (List[RankedPhEvalGeneResult]): List of ranked PhEval gene results threshold (float): Threshold for scores score_order (str): Score order for results, either ascending or descending proband_causative_genes (List[ProbandCausativeGene]): List of proband causative genes \"\"\" self . phenopacket_path = phenopacket_path self . results_dir = results_dir self . standardised_gene_results = standardised_gene_results self . threshold = threshold self . score_order = score_order self . proband_causative_genes = proband_causative_genes assess_gene_prioritisation ( rank_stats , rank_records , binary_classification_stats ) Assess gene prioritisation. This method assesses the prioritisation of genes based on the provided criteria and records ranks using a PrioritisationRankRecorder. Parameters: Name Type Description Default rank_stats RankStats RankStats class instance required rank_records defaultdict A defaultdict to store the correct ranked results. required binary_classification_stats BinaryClassificationStats BinaryClassificationStats class instance. required Source code in src/pheval/analyse/gene_prioritisation_analysis.py 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 def assess_gene_prioritisation ( self , rank_stats : RankStats , rank_records : defaultdict , binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess gene prioritisation. This method assesses the prioritisation of genes based on the provided criteria and records ranks using a PrioritisationRankRecorder. Args: rank_stats (RankStats): RankStats class instance rank_records (defaultdict): A defaultdict to store the correct ranked results. binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" relevant_ranks = [] for gene in self . proband_causative_genes : rank_stats . total += 1 gene_match = GenePrioritisationResult ( self . phenopacket_path , gene . gene_symbol ) for standardised_gene_result in self . standardised_gene_results : gene_identifier = self . _check_string_representation ( standardised_gene_result . gene_identifier ) gene_symbol = self . _check_string_representation ( standardised_gene_result . gene_symbol ) if ( isinstance ( gene_identifier , list ) and gene . gene_identifier in gene_identifier or isinstance ( gene_identifier , str ) and gene . gene_identifier == str or isinstance ( gene_symbol , list ) and gene . gene_symbol in gene_symbol or isinstance ( gene_symbol , str ) and gene . gene_symbol == gene_symbol ): gene_match = self . _record_matched_gene ( gene , rank_stats , standardised_gene_result ) ( relevant_ranks . append ( gene_match . rank ) if gene_match else relevant_ranks . append ( 0 ) ) break PrioritisationRankRecorder ( rank_stats . total , self . results_dir , ( GenePrioritisationResult ( self . phenopacket_path , gene . gene_symbol ) if gene_match is None else gene_match ), rank_records , ) . record_rank () rank_stats . relevant_result_ranks . append ( relevant_ranks ) binary_classification_stats . add_classification ( pheval_results = self . standardised_gene_results , relevant_ranks = relevant_ranks ) assess_phenopacket_gene_prioritisation ( phenopacket_path , score_order , results_dir_and_input , threshold , gene_rank_stats , gene_rank_comparison , gene_binary_classification_stats ) Assess gene prioritisation for a Phenopacket by comparing PhEval standardised gene results against the recorded causative genes for a proband in the Phenopacket. Parameters: Name Type Description Default phenopacket_path Path Path to the Phenopacket. required score_order str The order in which scores are arranged, either ascending or descending. required results_dir_and_input TrackInputOutputDirectories Input and output directories. required threshold float Threshold for assessment. required gene_rank_stats RankStats RankStats class instance. required gene_rank_comparison defaultdict Default dictionary for gene rank comparisons. required gene_binary_classification_stats BinaryClassificationStats BinaryClassificationStats class instance. required Source code in src/pheval/analyse/gene_prioritisation_analysis.py 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 def assess_phenopacket_gene_prioritisation ( phenopacket_path : Path , score_order : str , results_dir_and_input : TrackInputOutputDirectories , threshold : float , gene_rank_stats : RankStats , gene_rank_comparison : defaultdict , gene_binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess gene prioritisation for a Phenopacket by comparing PhEval standardised gene results against the recorded causative genes for a proband in the Phenopacket. Args: phenopacket_path (Path): Path to the Phenopacket. score_order (str): The order in which scores are arranged, either ascending or descending. results_dir_and_input (TrackInputOutputDirectories): Input and output directories. threshold (float): Threshold for assessment. gene_rank_stats (RankStats): RankStats class instance. gene_rank_comparison (defaultdict): Default dictionary for gene rank comparisons. gene_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" standardised_gene_result = results_dir_and_input . results_dir . joinpath ( f \"pheval_gene_results/ { phenopacket_path . stem } -pheval_gene_result.tsv\" ) pheval_gene_result = read_standardised_result ( standardised_gene_result ) proband_causative_genes = _obtain_causative_genes ( phenopacket_path ) AssessGenePrioritisation ( phenopacket_path , results_dir_and_input . results_dir . joinpath ( \"pheval_gene_results/\" ), parse_pheval_result ( RankedPhEvalGeneResult , pheval_gene_result ), threshold , score_order , proband_causative_genes , ) . assess_gene_prioritisation ( gene_rank_stats , gene_rank_comparison , gene_binary_classification_stats ) benchmark_gene_prioritisation ( results_directory_and_input , score_order , threshold , gene_rank_comparison ) Benchmark a directory based on gene prioritisation results. Args: results_directory_and_input (TrackInputOutputDirectories): Input and output directories. score_order (str): The order in which scores are arranged. threshold (float): Threshold for assessment. gene_rank_comparison (defaultdict): Default dictionary for gene rank comparisons. Returns: BenchmarkRunResults: An object containing benchmarking results for gene prioritisation, including ranks and rank statistics for the benchmarked directory. Source code in src/pheval/analyse/gene_prioritisation_analysis.py 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 def benchmark_gene_prioritisation ( results_directory_and_input : TrackInputOutputDirectories , score_order : str , threshold : float , gene_rank_comparison : defaultdict , ) -> BenchmarkRunResults : \"\"\" Benchmark a directory based on gene prioritisation results. Args: results_directory_and_input (TrackInputOutputDirectories): Input and output directories. score_order (str): The order in which scores are arranged. threshold (float): Threshold for assessment. gene_rank_comparison (defaultdict): Default dictionary for gene rank comparisons. Returns: BenchmarkRunResults: An object containing benchmarking results for gene prioritisation, including ranks and rank statistics for the benchmarked directory. \"\"\" gene_rank_stats = RankStats () gene_binary_classification_stats = BinaryClassificationStats () for phenopacket_path in all_files ( results_directory_and_input . phenopacket_dir ): assess_phenopacket_gene_prioritisation ( phenopacket_path , score_order , results_directory_and_input , threshold , gene_rank_stats , gene_rank_comparison , gene_binary_classification_stats , ) return BenchmarkRunResults ( results_dir = results_directory_and_input . results_dir , ranks = gene_rank_comparison , rank_stats = gene_rank_stats , binary_classification_stats = gene_binary_classification_stats , )","title":"Gene prioritisation analysis"},{"location":"api/pheval/analyse/gene_prioritisation_analysis/#src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation","text":"Class for assessing gene prioritisation based on thresholds and scoring orders. Source code in src/pheval/analyse/gene_prioritisation_analysis.py 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 class AssessGenePrioritisation : \"\"\"Class for assessing gene prioritisation based on thresholds and scoring orders.\"\"\" def __init__ ( self , phenopacket_path : Path , results_dir : Path , standardised_gene_results : List [ RankedPhEvalGeneResult ], threshold : float , score_order : str , proband_causative_genes : List [ ProbandCausativeGene ], ): \"\"\" Initialise AssessGenePrioritisation class. Args: phenopacket_path (Path): Path to the phenopacket file results_dir (Path): Path to the results directory standardised_gene_results (List[RankedPhEvalGeneResult]): List of ranked PhEval gene results threshold (float): Threshold for scores score_order (str): Score order for results, either ascending or descending proband_causative_genes (List[ProbandCausativeGene]): List of proband causative genes \"\"\" self . phenopacket_path = phenopacket_path self . results_dir = results_dir self . standardised_gene_results = standardised_gene_results self . threshold = threshold self . score_order = score_order self . proband_causative_genes = proband_causative_genes def _record_gene_prioritisation_match ( self , gene : ProbandCausativeGene , result_entry : RankedPhEvalGeneResult , rank_stats : RankStats , ) -> GenePrioritisationResult : \"\"\" Record the gene prioritisation rank if found within the results Args: gene (ProbandCausativeGene): Diagnosed proband gene result_entry (RankedPhEvalGeneResult): Ranked PhEval gene result entry rank_stats (RankStats): RankStats class instance Returns: GenePrioritisationResult: Recorded correct gene prioritisation rank result \"\"\" rank = result_entry . rank rank_stats . add_rank ( rank ) return GenePrioritisationResult ( self . phenopacket_path , gene . gene_symbol , rank ) def _assess_gene_with_threshold_ascending_order ( self , result_entry : RankedPhEvalGeneResult , gene : ProbandCausativeGene , rank_stats : RankStats , ) -> GenePrioritisationResult : \"\"\" Record the gene prioritisation rank if it meets the ascending order threshold. This method checks if the gene prioritisation rank meets the ascending order threshold. If the score of the result entry is less than the threshold, it records the gene rank. Args: result_entry (RankedPhEvalGeneResult): Ranked PhEval gene result entry gene (ProbandCausativeGene): Diagnosed proband gene rank_stats (RankStats): RankStats class instance Returns: GenePrioritisationResult: Recorded correct gene prioritisation rank result \"\"\" if float ( self . threshold ) > float ( result_entry . score ): return self . _record_gene_prioritisation_match ( gene , result_entry , rank_stats ) def _assess_gene_with_threshold ( self , result_entry : RankedPhEvalGeneResult , gene : ProbandCausativeGene , rank_stats : RankStats , ) -> GenePrioritisationResult : \"\"\" Record the gene prioritisation rank if it meets the score threshold. This method checks if the gene prioritisation rank meets the score threshold. If the score of the result entry is greater than the threshold, it records the gene rank. Args: result_entry (RankedPhEvalResult): Ranked PhEval gene result entry gene (ProbandCausativeGene): Diagnosed proband gene rank_stats (RankStats): RankStats class instance Returns: GenePrioritisationResult: Recorded correct gene prioritisation rank result \"\"\" if float ( self . threshold ) < float ( result_entry . score ): return self . _record_gene_prioritisation_match ( gene , result_entry , rank_stats ) def _record_matched_gene ( self , gene : ProbandCausativeGene , rank_stats : RankStats , standardised_gene_result : RankedPhEvalGeneResult , ) -> GenePrioritisationResult : \"\"\" Return the gene rank result - handling the specification of a threshold. This method determines and returns the gene rank result based on the specified threshold and score order. If the threshold is 0.0, it records the gene rank directly. Otherwise, it assesses the gene with the threshold based on the score order. Args: gene (ProbandCausativeGene): Diagnosed proband gene rank_stats (RankStats): RankStats class instance standardised_gene_result (RankedPhEvalGeneResult): Ranked PhEval gene result entry Returns: GenePrioritisationResult: Recorded correct gene prioritisation rank result \"\"\" if float ( self . threshold ) == 0.0 : return self . _record_gene_prioritisation_match ( gene , standardised_gene_result , rank_stats ) else : return ( self . _assess_gene_with_threshold ( standardised_gene_result , gene , rank_stats ) if self . score_order != \"ascending\" else self . _assess_gene_with_threshold_ascending_order ( standardised_gene_result , gene , rank_stats ) ) @staticmethod def _check_string_representation ( entity : str ) -> Union [ List [ str ], str ]: \"\"\" Check if the input string is a representation of a list and returns the list if true, otherwise the string. Args: entity (str): The input entity to check. Returns: Union[List[str], str]: A list if the input string is a list representation, otherwise the original string. \"\"\" list_pattern = re . compile ( r \"^\\[\\s*(?:[^\\[\\],\\s]+(?:\\s*,\\s*[^\\[\\],\\s]+)*)?\\s*\\]$\" ) if list_pattern . match ( str ( entity )): return ast . literal_eval ( entity ) else : return entity def assess_gene_prioritisation ( self , rank_stats : RankStats , rank_records : defaultdict , binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess gene prioritisation. This method assesses the prioritisation of genes based on the provided criteria and records ranks using a PrioritisationRankRecorder. Args: rank_stats (RankStats): RankStats class instance rank_records (defaultdict): A defaultdict to store the correct ranked results. binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" relevant_ranks = [] for gene in self . proband_causative_genes : rank_stats . total += 1 gene_match = GenePrioritisationResult ( self . phenopacket_path , gene . gene_symbol ) for standardised_gene_result in self . standardised_gene_results : gene_identifier = self . _check_string_representation ( standardised_gene_result . gene_identifier ) gene_symbol = self . _check_string_representation ( standardised_gene_result . gene_symbol ) if ( isinstance ( gene_identifier , list ) and gene . gene_identifier in gene_identifier or isinstance ( gene_identifier , str ) and gene . gene_identifier == str or isinstance ( gene_symbol , list ) and gene . gene_symbol in gene_symbol or isinstance ( gene_symbol , str ) and gene . gene_symbol == gene_symbol ): gene_match = self . _record_matched_gene ( gene , rank_stats , standardised_gene_result ) ( relevant_ranks . append ( gene_match . rank ) if gene_match else relevant_ranks . append ( 0 ) ) break PrioritisationRankRecorder ( rank_stats . total , self . results_dir , ( GenePrioritisationResult ( self . phenopacket_path , gene . gene_symbol ) if gene_match is None else gene_match ), rank_records , ) . record_rank () rank_stats . relevant_result_ranks . append ( relevant_ranks ) binary_classification_stats . add_classification ( pheval_results = self . standardised_gene_results , relevant_ranks = relevant_ranks )","title":"AssessGenePrioritisation"},{"location":"api/pheval/analyse/gene_prioritisation_analysis/#src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation.__init__","text":"Initialise AssessGenePrioritisation class. Parameters: Name Type Description Default phenopacket_path Path Path to the phenopacket file required results_dir Path Path to the results directory required standardised_gene_results List [ RankedPhEvalGeneResult ] List of ranked PhEval gene results required threshold float Threshold for scores required score_order str Score order for results, either ascending or descending required proband_causative_genes List [ ProbandCausativeGene ] List of proband causative genes required Source code in src/pheval/analyse/gene_prioritisation_analysis.py 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 def __init__ ( self , phenopacket_path : Path , results_dir : Path , standardised_gene_results : List [ RankedPhEvalGeneResult ], threshold : float , score_order : str , proband_causative_genes : List [ ProbandCausativeGene ], ): \"\"\" Initialise AssessGenePrioritisation class. Args: phenopacket_path (Path): Path to the phenopacket file results_dir (Path): Path to the results directory standardised_gene_results (List[RankedPhEvalGeneResult]): List of ranked PhEval gene results threshold (float): Threshold for scores score_order (str): Score order for results, either ascending or descending proband_causative_genes (List[ProbandCausativeGene]): List of proband causative genes \"\"\" self . phenopacket_path = phenopacket_path self . results_dir = results_dir self . standardised_gene_results = standardised_gene_results self . threshold = threshold self . score_order = score_order self . proband_causative_genes = proband_causative_genes","title":"__init__()"},{"location":"api/pheval/analyse/gene_prioritisation_analysis/#src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation.assess_gene_prioritisation","text":"Assess gene prioritisation. This method assesses the prioritisation of genes based on the provided criteria and records ranks using a PrioritisationRankRecorder. Parameters: Name Type Description Default rank_stats RankStats RankStats class instance required rank_records defaultdict A defaultdict to store the correct ranked results. required binary_classification_stats BinaryClassificationStats BinaryClassificationStats class instance. required Source code in src/pheval/analyse/gene_prioritisation_analysis.py 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 def assess_gene_prioritisation ( self , rank_stats : RankStats , rank_records : defaultdict , binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess gene prioritisation. This method assesses the prioritisation of genes based on the provided criteria and records ranks using a PrioritisationRankRecorder. Args: rank_stats (RankStats): RankStats class instance rank_records (defaultdict): A defaultdict to store the correct ranked results. binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" relevant_ranks = [] for gene in self . proband_causative_genes : rank_stats . total += 1 gene_match = GenePrioritisationResult ( self . phenopacket_path , gene . gene_symbol ) for standardised_gene_result in self . standardised_gene_results : gene_identifier = self . _check_string_representation ( standardised_gene_result . gene_identifier ) gene_symbol = self . _check_string_representation ( standardised_gene_result . gene_symbol ) if ( isinstance ( gene_identifier , list ) and gene . gene_identifier in gene_identifier or isinstance ( gene_identifier , str ) and gene . gene_identifier == str or isinstance ( gene_symbol , list ) and gene . gene_symbol in gene_symbol or isinstance ( gene_symbol , str ) and gene . gene_symbol == gene_symbol ): gene_match = self . _record_matched_gene ( gene , rank_stats , standardised_gene_result ) ( relevant_ranks . append ( gene_match . rank ) if gene_match else relevant_ranks . append ( 0 ) ) break PrioritisationRankRecorder ( rank_stats . total , self . results_dir , ( GenePrioritisationResult ( self . phenopacket_path , gene . gene_symbol ) if gene_match is None else gene_match ), rank_records , ) . record_rank () rank_stats . relevant_result_ranks . append ( relevant_ranks ) binary_classification_stats . add_classification ( pheval_results = self . standardised_gene_results , relevant_ranks = relevant_ranks )","title":"assess_gene_prioritisation()"},{"location":"api/pheval/analyse/gene_prioritisation_analysis/#src.pheval.analyse.gene_prioritisation_analysis.assess_phenopacket_gene_prioritisation","text":"Assess gene prioritisation for a Phenopacket by comparing PhEval standardised gene results against the recorded causative genes for a proband in the Phenopacket. Parameters: Name Type Description Default phenopacket_path Path Path to the Phenopacket. required score_order str The order in which scores are arranged, either ascending or descending. required results_dir_and_input TrackInputOutputDirectories Input and output directories. required threshold float Threshold for assessment. required gene_rank_stats RankStats RankStats class instance. required gene_rank_comparison defaultdict Default dictionary for gene rank comparisons. required gene_binary_classification_stats BinaryClassificationStats BinaryClassificationStats class instance. required Source code in src/pheval/analyse/gene_prioritisation_analysis.py 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 def assess_phenopacket_gene_prioritisation ( phenopacket_path : Path , score_order : str , results_dir_and_input : TrackInputOutputDirectories , threshold : float , gene_rank_stats : RankStats , gene_rank_comparison : defaultdict , gene_binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess gene prioritisation for a Phenopacket by comparing PhEval standardised gene results against the recorded causative genes for a proband in the Phenopacket. Args: phenopacket_path (Path): Path to the Phenopacket. score_order (str): The order in which scores are arranged, either ascending or descending. results_dir_and_input (TrackInputOutputDirectories): Input and output directories. threshold (float): Threshold for assessment. gene_rank_stats (RankStats): RankStats class instance. gene_rank_comparison (defaultdict): Default dictionary for gene rank comparisons. gene_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" standardised_gene_result = results_dir_and_input . results_dir . joinpath ( f \"pheval_gene_results/ { phenopacket_path . stem } -pheval_gene_result.tsv\" ) pheval_gene_result = read_standardised_result ( standardised_gene_result ) proband_causative_genes = _obtain_causative_genes ( phenopacket_path ) AssessGenePrioritisation ( phenopacket_path , results_dir_and_input . results_dir . joinpath ( \"pheval_gene_results/\" ), parse_pheval_result ( RankedPhEvalGeneResult , pheval_gene_result ), threshold , score_order , proband_causative_genes , ) . assess_gene_prioritisation ( gene_rank_stats , gene_rank_comparison , gene_binary_classification_stats )","title":"assess_phenopacket_gene_prioritisation()"},{"location":"api/pheval/analyse/gene_prioritisation_analysis/#src.pheval.analyse.gene_prioritisation_analysis.benchmark_gene_prioritisation","text":"Benchmark a directory based on gene prioritisation results. Args: results_directory_and_input (TrackInputOutputDirectories): Input and output directories. score_order (str): The order in which scores are arranged. threshold (float): Threshold for assessment. gene_rank_comparison (defaultdict): Default dictionary for gene rank comparisons. Returns: BenchmarkRunResults: An object containing benchmarking results for gene prioritisation, including ranks and rank statistics for the benchmarked directory. Source code in src/pheval/analyse/gene_prioritisation_analysis.py 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 def benchmark_gene_prioritisation ( results_directory_and_input : TrackInputOutputDirectories , score_order : str , threshold : float , gene_rank_comparison : defaultdict , ) -> BenchmarkRunResults : \"\"\" Benchmark a directory based on gene prioritisation results. Args: results_directory_and_input (TrackInputOutputDirectories): Input and output directories. score_order (str): The order in which scores are arranged. threshold (float): Threshold for assessment. gene_rank_comparison (defaultdict): Default dictionary for gene rank comparisons. Returns: BenchmarkRunResults: An object containing benchmarking results for gene prioritisation, including ranks and rank statistics for the benchmarked directory. \"\"\" gene_rank_stats = RankStats () gene_binary_classification_stats = BinaryClassificationStats () for phenopacket_path in all_files ( results_directory_and_input . phenopacket_dir ): assess_phenopacket_gene_prioritisation ( phenopacket_path , score_order , results_directory_and_input , threshold , gene_rank_stats , gene_rank_comparison , gene_binary_classification_stats , ) return BenchmarkRunResults ( results_dir = results_directory_and_input . results_dir , ranks = gene_rank_comparison , rank_stats = gene_rank_stats , binary_classification_stats = gene_binary_classification_stats , )","title":"benchmark_gene_prioritisation()"},{"location":"api/pheval/analyse/generate_plots/","text":"PlotGenerator Class to generate plots. Source code in src/pheval/analyse/generate_plots.py 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 class PlotGenerator : \"\"\"Class to generate plots.\"\"\" palette_hex_codes = [ \"#f4ae3d\" , \"#ee5825\" , \"#2b7288\" , \"#9a84b2\" , \"#0c604c\" , \"#c94c4c\" , \"#3d8e83\" , \"#725ac1\" , \"#e7ba52\" , \"#1b9e77\" , ] def __init__ ( self , ): \"\"\" Initialise the PlotGenerator class. Note: `self.stats` will be used to store statistics data. `self.mrr` will store Mean Reciprocal Rank (MRR) values. Matplotlib settings are configured to remove the right and top axes spines for generated plots. \"\"\" self . stats , self . mrr = [], [] matplotlib . rcParams [ \"axes.spines.right\" ] = False matplotlib . rcParams [ \"axes.spines.top\" ] = False @staticmethod def _create_run_identifier ( results_dir : Path ) -> str : \"\"\" Create a run identifier from a path. Args: results_dir (Path): The directory path for results. Returns: str: A string representing the run identifier created from the given path. \"\"\" return f \" { Path ( results_dir ) . parents [ 0 ] . name } _ { trim_corpus_results_directory_suffix ( Path ( results_dir ) . name ) } \" def return_benchmark_name ( self , benchmark_result : BenchmarkRunResults ) -> str : \"\"\" Return the benchmark name for a run. Args: benchmark_result (BenchmarkRunResults): The benchmarking results for a run. Returns: str: The benchmark name obtained from the given BenchmarkRunResults instance. \"\"\" return ( benchmark_result . benchmark_name if benchmark_result . results_dir is None else self . _create_run_identifier ( benchmark_result . results_dir ) ) def _generate_stacked_bar_plot_data ( self , benchmark_result : BenchmarkRunResults ) -> None : \"\"\" Generate data in the correct format for dataframe creation for a stacked bar plot, appending to the self.stats attribute of the class. Args: benchmark_result (BenchmarkRunResults): The benchmarking results for a run. \"\"\" rank_stats = benchmark_result . rank_stats self . stats . append ( { \"Run\" : self . return_benchmark_name ( benchmark_result ), \"Top\" : benchmark_result . rank_stats . percentage_top (), \"2-3\" : rank_stats . percentage_difference ( rank_stats . percentage_top3 (), rank_stats . percentage_top () ), \"4-5\" : rank_stats . percentage_difference ( rank_stats . percentage_top5 (), rank_stats . percentage_top3 () ), \"6-10\" : rank_stats . percentage_difference ( rank_stats . percentage_top10 (), rank_stats . percentage_top5 () ), \">10\" : rank_stats . percentage_difference ( rank_stats . percentage_found (), rank_stats . percentage_top10 () ), \"Missed\" : rank_stats . percentage_difference ( 100 , rank_stats . percentage_found ()), } ) def _generate_stats_mrr_bar_plot_data ( self , benchmark_result : BenchmarkRunResults ) -> None : \"\"\" Generate data in the correct format for dataframe creation for MRR (Mean Reciprocal Rank) bar plot, appending to the self.mrr attribute of the class. Args: benchmark_result (BenchmarkRunResults): The benchmarking results for a run. \"\"\" self . mrr . extend ( [ { \"Rank\" : \"MRR\" , \"Percentage\" : benchmark_result . rank_stats . return_mean_reciprocal_rank (), \"Run\" : self . return_benchmark_name ( benchmark_result ), } ] ) def generate_stacked_bar_plot ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , title : str = None , ) -> None : \"\"\" Generate a stacked bar plot and Mean Reciprocal Rank (MRR) bar plot. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. title (str, optional): Title for the generated plot. Defaults to None. \"\"\" for benchmark_result in benchmarking_results : self . _generate_stacked_bar_plot_data ( benchmark_result ) self . _generate_stats_mrr_bar_plot_data ( benchmark_result ) stats_df = pd . DataFrame ( self . stats ) stats_df . set_index ( \"Run\" ) . plot ( kind = \"bar\" , stacked = True , color = self . palette_hex_codes , ylabel = benchmark_generator . y_label , edgecolor = \"white\" , ) . legend ( loc = \"center left\" , bbox_to_anchor = ( 1.0 , 0.5 )) if title is None : plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } Rank Stats\" ) else : plt . title ( title , loc = \"center\" , fontsize = 15 ) plt . ylim ( 0 , 100 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _rank_stats.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) mrr_df = pd . DataFrame ( self . mrr ) mrr_df . set_index ( \"Run\" ) . plot ( kind = \"bar\" , color = self . palette_hex_codes , ylabel = f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } mean reciprocal rank\" , legend = False , edgecolor = \"white\" , ) plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } results - mean reciprocal rank\" ) plt . ylim ( 0 , 1 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _mrr.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) def _generate_cumulative_bar_plot_data ( self , benchmark_result : BenchmarkRunResults ): \"\"\" Generate data in the correct format for dataframe creation for a cumulative bar plot, appending to the self.stats attribute of the class. Args: benchmark_result (BenchmarkRunResults): The benchmarking results for a run. \"\"\" rank_stats = benchmark_result . rank_stats run_identifier = self . return_benchmark_name ( benchmark_result ) self . stats . extend ( [ { \"Rank\" : \"Top\" , \"Percentage\" : rank_stats . percentage_top () / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"Top3\" , \"Percentage\" : rank_stats . percentage_top3 () / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"Top5\" , \"Percentage\" : rank_stats . percentage_top5 () / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"Top10\" , \"Percentage\" : rank_stats . percentage_top10 () / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"Found\" , \"Percentage\" : rank_stats . percentage_found () / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"Missed\" , \"Percentage\" : rank_stats . percentage_difference ( 100 , rank_stats . percentage_found () ) / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"MRR\" , \"Percentage\" : rank_stats . return_mean_reciprocal_rank (), \"Run\" : run_identifier , }, ] ) def generate_cumulative_bar ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , title : str = None , ) -> None : \"\"\" Generate a cumulative bar plot. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. title (str, optional): Title for the generated plot. Defaults to None. \"\"\" for benchmark_result in benchmarking_results : self . _generate_cumulative_bar_plot_data ( benchmark_result ) stats_df = pd . DataFrame ( self . stats ) sns . catplot ( data = stats_df , kind = \"bar\" , x = \"Rank\" , y = \"Percentage\" , hue = \"Run\" , palette = self . palette_hex_codes , edgecolor = \"white\" , legend = False , ) . set ( xlabel = \"Rank\" , ylabel = benchmark_generator . y_label ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 ), ncol = 3 , title = \"Run\" ) if title is None : plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } Cumulative Rank Stats\" ) else : plt . title ( title , loc = \"center\" , fontsize = 15 ) plt . ylim ( 0 , 1 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _rank_stats.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) def _generate_non_cumulative_bar_plot_data ( self , benchmark_result : BenchmarkRunResults ) -> [ dict ]: \"\"\" Generate data in the correct format for dataframe creation for a non-cumulative bar plot, appending to the self.stats attribute of the class. Args: benchmark_result (BenchmarkRunResults): The benchmarking results for a run. \"\"\" rank_stats = benchmark_result . rank_stats run_identifier = self . return_benchmark_name ( benchmark_result ) self . stats . extend ( [ { \"Rank\" : \"Top\" , \"Percentage\" : rank_stats . percentage_top () / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"2-3\" , \"Percentage\" : rank_stats . percentage_difference ( rank_stats . percentage_top3 (), rank_stats . percentage_top () ) / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"4-5\" , \"Percentage\" : rank_stats . percentage_difference ( rank_stats . percentage_top5 (), rank_stats . percentage_top3 () ) / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"6-10\" , \"Percentage\" : rank_stats . percentage_difference ( rank_stats . percentage_top10 (), rank_stats . percentage_top5 () ) / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \">10\" , \"Percentage\" : rank_stats . percentage_difference ( rank_stats . percentage_found (), rank_stats . percentage_top10 () ) / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"Missed\" , \"Percentage\" : rank_stats . percentage_difference ( 100 , rank_stats . percentage_found () ) / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"MRR\" , \"Percentage\" : rank_stats . return_mean_reciprocal_rank (), \"Run\" : run_identifier , }, ] ) def generate_roc_curve ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , ): \"\"\" Generate and plot Receiver Operating Characteristic (ROC) curves for binary classification benchmark results. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. \"\"\" for i , benchmark_result in enumerate ( benchmarking_results ): fpr , tpr , thresh = roc_curve ( benchmark_result . binary_classification_stats . labels , benchmark_result . binary_classification_stats . scores , pos_label = 1 , ) roc_auc = auc ( fpr , tpr ) plt . plot ( fpr , tpr , label = f \" { self . return_benchmark_name ( benchmark_result ) } ROC Curve (AUC = { roc_auc : .2f } )\" , color = self . palette_hex_codes [ i ], ) plt . plot ( linestyle = \"--\" , color = \"gray\" ) plt . xlabel ( \"False Positive Rate\" ) plt . ylabel ( \"True Positive Rate\" ) plt . title ( \"Receiver Operating Characteristic (ROC) Curve\" ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 )) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _roc_curve.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) def generate_precision_recall ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , ): \"\"\" Generate and plot Precision-Recall curves for binary classification benchmark results. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. \"\"\" plt . figure () for i , benchmark_result in enumerate ( benchmarking_results ): precision , recall , thresh = precision_recall_curve ( benchmark_result . binary_classification_stats . labels , benchmark_result . binary_classification_stats . scores , ) precision_recall_auc = auc ( recall , precision ) plt . plot ( recall , precision , label = f \" { self . return_benchmark_name ( benchmark_result ) } Precision-Recall Curve \" f \"(AUC = { precision_recall_auc : .2f } )\" , color = self . palette_hex_codes [ i ], ) plt . plot ( linestyle = \"--\" , color = \"gray\" ) plt . xlabel ( \"Recall\" ) plt . ylabel ( \"Precision\" ) plt . title ( \"Precision-Recall Curve\" ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 )) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _precision_recall_curve.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) def generate_non_cumulative_bar ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , title : str = None , ) -> None : \"\"\" Generate a non-cumulative bar plot. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. title (str, optional): Title for the generated plot. Defaults to None. \"\"\" for benchmark_result in benchmarking_results : self . _generate_non_cumulative_bar_plot_data ( benchmark_result ) stats_df = pd . DataFrame ( self . stats ) sns . catplot ( data = stats_df , kind = \"bar\" , x = \"Rank\" , y = \"Percentage\" , hue = \"Run\" , palette = self . palette_hex_codes , edgecolor = \"white\" , legend = False , ) . set ( xlabel = \"Rank\" , ylabel = benchmark_generator . y_label ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 ), ncol = 3 , title = \"Run\" ) if title is None : plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } Non-Cumulative Rank Stats\" ) else : plt . title ( title , loc = \"center\" , fontsize = 15 ) plt . ylim ( 0 , 1 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _rank_stats.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) __init__ () Initialise the PlotGenerator class. Note self.stats will be used to store statistics data. self.mrr will store Mean Reciprocal Rank (MRR) values. Matplotlib settings are configured to remove the right and top axes spines for generated plots. Source code in src/pheval/analyse/generate_plots.py 53 54 55 56 57 58 59 60 61 62 63 64 65 66 def __init__ ( self , ): \"\"\" Initialise the PlotGenerator class. Note: `self.stats` will be used to store statistics data. `self.mrr` will store Mean Reciprocal Rank (MRR) values. Matplotlib settings are configured to remove the right and top axes spines for generated plots. \"\"\" self . stats , self . mrr = [], [] matplotlib . rcParams [ \"axes.spines.right\" ] = False matplotlib . rcParams [ \"axes.spines.top\" ] = False generate_cumulative_bar ( benchmarking_results , benchmark_generator , title = None ) Generate a cumulative bar plot. Parameters: Name Type Description Default benchmarking_results List [ BenchmarkRunResults ] List of benchmarking results for multiple runs. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required title str Title for the generated plot. Defaults to None. None Source code in src/pheval/analyse/generate_plots.py 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 def generate_cumulative_bar ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , title : str = None , ) -> None : \"\"\" Generate a cumulative bar plot. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. title (str, optional): Title for the generated plot. Defaults to None. \"\"\" for benchmark_result in benchmarking_results : self . _generate_cumulative_bar_plot_data ( benchmark_result ) stats_df = pd . DataFrame ( self . stats ) sns . catplot ( data = stats_df , kind = \"bar\" , x = \"Rank\" , y = \"Percentage\" , hue = \"Run\" , palette = self . palette_hex_codes , edgecolor = \"white\" , legend = False , ) . set ( xlabel = \"Rank\" , ylabel = benchmark_generator . y_label ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 ), ncol = 3 , title = \"Run\" ) if title is None : plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } Cumulative Rank Stats\" ) else : plt . title ( title , loc = \"center\" , fontsize = 15 ) plt . ylim ( 0 , 1 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _rank_stats.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) generate_non_cumulative_bar ( benchmarking_results , benchmark_generator , title = None ) Generate a non-cumulative bar plot. Parameters: Name Type Description Default benchmarking_results List [ BenchmarkRunResults ] List of benchmarking results for multiple runs. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required title str Title for the generated plot. Defaults to None. None Source code in src/pheval/analyse/generate_plots.py 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 def generate_non_cumulative_bar ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , title : str = None , ) -> None : \"\"\" Generate a non-cumulative bar plot. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. title (str, optional): Title for the generated plot. Defaults to None. \"\"\" for benchmark_result in benchmarking_results : self . _generate_non_cumulative_bar_plot_data ( benchmark_result ) stats_df = pd . DataFrame ( self . stats ) sns . catplot ( data = stats_df , kind = \"bar\" , x = \"Rank\" , y = \"Percentage\" , hue = \"Run\" , palette = self . palette_hex_codes , edgecolor = \"white\" , legend = False , ) . set ( xlabel = \"Rank\" , ylabel = benchmark_generator . y_label ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 ), ncol = 3 , title = \"Run\" ) if title is None : plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } Non-Cumulative Rank Stats\" ) else : plt . title ( title , loc = \"center\" , fontsize = 15 ) plt . ylim ( 0 , 1 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _rank_stats.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) generate_precision_recall ( benchmarking_results , benchmark_generator ) Generate and plot Precision-Recall curves for binary classification benchmark results. Parameters: Name Type Description Default benchmarking_results List [ BenchmarkRunResults ] List of benchmarking results for multiple runs. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required Source code in src/pheval/analyse/generate_plots.py 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 def generate_precision_recall ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , ): \"\"\" Generate and plot Precision-Recall curves for binary classification benchmark results. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. \"\"\" plt . figure () for i , benchmark_result in enumerate ( benchmarking_results ): precision , recall , thresh = precision_recall_curve ( benchmark_result . binary_classification_stats . labels , benchmark_result . binary_classification_stats . scores , ) precision_recall_auc = auc ( recall , precision ) plt . plot ( recall , precision , label = f \" { self . return_benchmark_name ( benchmark_result ) } Precision-Recall Curve \" f \"(AUC = { precision_recall_auc : .2f } )\" , color = self . palette_hex_codes [ i ], ) plt . plot ( linestyle = \"--\" , color = \"gray\" ) plt . xlabel ( \"Recall\" ) plt . ylabel ( \"Precision\" ) plt . title ( \"Precision-Recall Curve\" ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 )) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _precision_recall_curve.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) generate_roc_curve ( benchmarking_results , benchmark_generator ) Generate and plot Receiver Operating Characteristic (ROC) curves for binary classification benchmark results. Parameters: Name Type Description Default benchmarking_results List [ BenchmarkRunResults ] List of benchmarking results for multiple runs. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required Source code in src/pheval/analyse/generate_plots.py 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 def generate_roc_curve ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , ): \"\"\" Generate and plot Receiver Operating Characteristic (ROC) curves for binary classification benchmark results. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. \"\"\" for i , benchmark_result in enumerate ( benchmarking_results ): fpr , tpr , thresh = roc_curve ( benchmark_result . binary_classification_stats . labels , benchmark_result . binary_classification_stats . scores , pos_label = 1 , ) roc_auc = auc ( fpr , tpr ) plt . plot ( fpr , tpr , label = f \" { self . return_benchmark_name ( benchmark_result ) } ROC Curve (AUC = { roc_auc : .2f } )\" , color = self . palette_hex_codes [ i ], ) plt . plot ( linestyle = \"--\" , color = \"gray\" ) plt . xlabel ( \"False Positive Rate\" ) plt . ylabel ( \"True Positive Rate\" ) plt . title ( \"Receiver Operating Characteristic (ROC) Curve\" ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 )) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _roc_curve.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) generate_stacked_bar_plot ( benchmarking_results , benchmark_generator , title = None ) Generate a stacked bar plot and Mean Reciprocal Rank (MRR) bar plot. Parameters: Name Type Description Default benchmarking_results List [ BenchmarkRunResults ] List of benchmarking results for multiple runs. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required title str Title for the generated plot. Defaults to None. None Source code in src/pheval/analyse/generate_plots.py 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 def generate_stacked_bar_plot ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , title : str = None , ) -> None : \"\"\" Generate a stacked bar plot and Mean Reciprocal Rank (MRR) bar plot. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. title (str, optional): Title for the generated plot. Defaults to None. \"\"\" for benchmark_result in benchmarking_results : self . _generate_stacked_bar_plot_data ( benchmark_result ) self . _generate_stats_mrr_bar_plot_data ( benchmark_result ) stats_df = pd . DataFrame ( self . stats ) stats_df . set_index ( \"Run\" ) . plot ( kind = \"bar\" , stacked = True , color = self . palette_hex_codes , ylabel = benchmark_generator . y_label , edgecolor = \"white\" , ) . legend ( loc = \"center left\" , bbox_to_anchor = ( 1.0 , 0.5 )) if title is None : plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } Rank Stats\" ) else : plt . title ( title , loc = \"center\" , fontsize = 15 ) plt . ylim ( 0 , 100 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _rank_stats.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) mrr_df = pd . DataFrame ( self . mrr ) mrr_df . set_index ( \"Run\" ) . plot ( kind = \"bar\" , color = self . palette_hex_codes , ylabel = f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } mean reciprocal rank\" , legend = False , edgecolor = \"white\" , ) plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } results - mean reciprocal rank\" ) plt . ylim ( 0 , 1 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _mrr.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) return_benchmark_name ( benchmark_result ) Return the benchmark name for a run. Parameters: Name Type Description Default benchmark_result BenchmarkRunResults The benchmarking results for a run. required Returns: Name Type Description str str The benchmark name obtained from the given BenchmarkRunResults instance. Source code in src/pheval/analyse/generate_plots.py 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 def return_benchmark_name ( self , benchmark_result : BenchmarkRunResults ) -> str : \"\"\" Return the benchmark name for a run. Args: benchmark_result (BenchmarkRunResults): The benchmarking results for a run. Returns: str: The benchmark name obtained from the given BenchmarkRunResults instance. \"\"\" return ( benchmark_result . benchmark_name if benchmark_result . results_dir is None else self . _create_run_identifier ( benchmark_result . results_dir ) ) generate_plots ( benchmarking_results , benchmark_generator , plot_type , title = None , generate_from_tsv = False ) Generate summary statistics bar plots for prioritisation. This method generates summary statistics bar plots based on the provided benchmarking results and plot type. Parameters: Name Type Description Default benchmarking_results list [ BenchmarkRunResults ] List of benchmarking results for multiple runs. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required plot_type str Type of plot to be generated (\"bar_stacked\", \"bar_cumulative\", \"bar_non_cumulative\"). required title str Title for the generated plot. Defaults to None. None generate_from_tsv bool Specify whether to generate plots from the TSV file. Defaults to False. False Source code in src/pheval/analyse/generate_plots.py 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 def generate_plots ( benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , plot_type : str , title : str = None , generate_from_tsv : bool = False , ) -> None : \"\"\" Generate summary statistics bar plots for prioritisation. This method generates summary statistics bar plots based on the provided benchmarking results and plot type. Args: benchmarking_results (list[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. plot_type (str): Type of plot to be generated (\"bar_stacked\", \"bar_cumulative\", \"bar_non_cumulative\"). title (str, optional): Title for the generated plot. Defaults to None. generate_from_tsv (bool): Specify whether to generate plots from the TSV file. Defaults to False. \"\"\" plot_generator = PlotGenerator () if not generate_from_tsv : plot_generator . generate_roc_curve ( benchmarking_results , benchmark_generator ) plot_generator . generate_precision_recall ( benchmarking_results , benchmark_generator ) if plot_type == \"bar_stacked\" : plot_generator . generate_stacked_bar_plot ( benchmarking_results , benchmark_generator , title ) elif plot_type == \"bar_cumulative\" : plot_generator . generate_cumulative_bar ( benchmarking_results , benchmark_generator , title ) elif plot_type == \"bar_non_cumulative\" : plot_generator . generate_non_cumulative_bar ( benchmarking_results , benchmark_generator , title ) generate_plots_from_benchmark_summary_tsv ( benchmark_summary_tsv , gene_analysis , variant_analysis , disease_analysis , plot_type , title ) Generate bar plot from summary benchmark results. Reads a summary of benchmark results from a TSV file and generates a bar plot based on the analysis type and plot type. Parameters: Name Type Description Default benchmark_summary_tsv Path Path to the summary TSV file containing benchmark results. required gene_analysis bool Flag indicating whether to analyse gene prioritisation. required variant_analysis bool Flag indicating whether to analyse variant prioritisation. required disease_analysis bool Flag indicating whether to analyse disease prioritisation. required plot_type str Type of plot to be generated (\"bar_stacked\", \"bar_cumulative\", \"bar_non_cumulative\"). required title str Title for the generated plot. required Raises: Type Description ValueError If an unsupported plot type is specified. Source code in src/pheval/analyse/generate_plots.py 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 def generate_plots_from_benchmark_summary_tsv ( benchmark_summary_tsv : Path , gene_analysis : bool , variant_analysis : bool , disease_analysis : bool , plot_type : str , title : str , ): \"\"\" Generate bar plot from summary benchmark results. Reads a summary of benchmark results from a TSV file and generates a bar plot based on the analysis type and plot type. Args: benchmark_summary_tsv (Path): Path to the summary TSV file containing benchmark results. gene_analysis (bool): Flag indicating whether to analyse gene prioritisation. variant_analysis (bool): Flag indicating whether to analyse variant prioritisation. disease_analysis (bool): Flag indicating whether to analyse disease prioritisation. plot_type (str): Type of plot to be generated (\"bar_stacked\", \"bar_cumulative\", \"bar_non_cumulative\"). title (str): Title for the generated plot. Raises: ValueError: If an unsupported plot type is specified. \"\"\" benchmark_stats_summary = read_benchmark_tsv_result_summary ( benchmark_summary_tsv ) benchmarking_results = parse_benchmark_result_summary ( benchmark_stats_summary ) if gene_analysis : benchmark_generator = GeneBenchmarkRunOutputGenerator () elif variant_analysis : benchmark_generator = VariantBenchmarkRunOutputGenerator () elif disease_analysis : benchmark_generator = DiseaseBenchmarkRunOutputGenerator () else : raise ValueError ( \"Specify one analysis type (gene_analysis, variant_analysis, or disease_analysis)\" ) generate_plots ( benchmarking_results , benchmark_generator , plot_type , title , True ) trim_corpus_results_directory_suffix ( corpus_results_directory ) Trim the suffix from the corpus results directory name. Parameters: Name Type Description Default corpus_results_directory Path The directory path containing corpus results. required Returns: Name Type Description Path Path The Path object with the suffix removed from the directory name. Source code in src/pheval/analyse/generate_plots.py 24 25 26 27 28 29 30 31 32 33 34 def trim_corpus_results_directory_suffix ( corpus_results_directory : Path ) -> Path : \"\"\" Trim the suffix from the corpus results directory name. Args: corpus_results_directory (Path): The directory path containing corpus results. Returns: Path: The Path object with the suffix removed from the directory name. \"\"\" return Path ( str ( corpus_results_directory ) . replace ( PHEVAL_RESULTS_DIRECTORY_SUFFIX , \"\" ))","title":"Generate plots"},{"location":"api/pheval/analyse/generate_plots/#src.pheval.analyse.generate_plots.PlotGenerator","text":"Class to generate plots. Source code in src/pheval/analyse/generate_plots.py 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 class PlotGenerator : \"\"\"Class to generate plots.\"\"\" palette_hex_codes = [ \"#f4ae3d\" , \"#ee5825\" , \"#2b7288\" , \"#9a84b2\" , \"#0c604c\" , \"#c94c4c\" , \"#3d8e83\" , \"#725ac1\" , \"#e7ba52\" , \"#1b9e77\" , ] def __init__ ( self , ): \"\"\" Initialise the PlotGenerator class. Note: `self.stats` will be used to store statistics data. `self.mrr` will store Mean Reciprocal Rank (MRR) values. Matplotlib settings are configured to remove the right and top axes spines for generated plots. \"\"\" self . stats , self . mrr = [], [] matplotlib . rcParams [ \"axes.spines.right\" ] = False matplotlib . rcParams [ \"axes.spines.top\" ] = False @staticmethod def _create_run_identifier ( results_dir : Path ) -> str : \"\"\" Create a run identifier from a path. Args: results_dir (Path): The directory path for results. Returns: str: A string representing the run identifier created from the given path. \"\"\" return f \" { Path ( results_dir ) . parents [ 0 ] . name } _ { trim_corpus_results_directory_suffix ( Path ( results_dir ) . name ) } \" def return_benchmark_name ( self , benchmark_result : BenchmarkRunResults ) -> str : \"\"\" Return the benchmark name for a run. Args: benchmark_result (BenchmarkRunResults): The benchmarking results for a run. Returns: str: The benchmark name obtained from the given BenchmarkRunResults instance. \"\"\" return ( benchmark_result . benchmark_name if benchmark_result . results_dir is None else self . _create_run_identifier ( benchmark_result . results_dir ) ) def _generate_stacked_bar_plot_data ( self , benchmark_result : BenchmarkRunResults ) -> None : \"\"\" Generate data in the correct format for dataframe creation for a stacked bar plot, appending to the self.stats attribute of the class. Args: benchmark_result (BenchmarkRunResults): The benchmarking results for a run. \"\"\" rank_stats = benchmark_result . rank_stats self . stats . append ( { \"Run\" : self . return_benchmark_name ( benchmark_result ), \"Top\" : benchmark_result . rank_stats . percentage_top (), \"2-3\" : rank_stats . percentage_difference ( rank_stats . percentage_top3 (), rank_stats . percentage_top () ), \"4-5\" : rank_stats . percentage_difference ( rank_stats . percentage_top5 (), rank_stats . percentage_top3 () ), \"6-10\" : rank_stats . percentage_difference ( rank_stats . percentage_top10 (), rank_stats . percentage_top5 () ), \">10\" : rank_stats . percentage_difference ( rank_stats . percentage_found (), rank_stats . percentage_top10 () ), \"Missed\" : rank_stats . percentage_difference ( 100 , rank_stats . percentage_found ()), } ) def _generate_stats_mrr_bar_plot_data ( self , benchmark_result : BenchmarkRunResults ) -> None : \"\"\" Generate data in the correct format for dataframe creation for MRR (Mean Reciprocal Rank) bar plot, appending to the self.mrr attribute of the class. Args: benchmark_result (BenchmarkRunResults): The benchmarking results for a run. \"\"\" self . mrr . extend ( [ { \"Rank\" : \"MRR\" , \"Percentage\" : benchmark_result . rank_stats . return_mean_reciprocal_rank (), \"Run\" : self . return_benchmark_name ( benchmark_result ), } ] ) def generate_stacked_bar_plot ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , title : str = None , ) -> None : \"\"\" Generate a stacked bar plot and Mean Reciprocal Rank (MRR) bar plot. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. title (str, optional): Title for the generated plot. Defaults to None. \"\"\" for benchmark_result in benchmarking_results : self . _generate_stacked_bar_plot_data ( benchmark_result ) self . _generate_stats_mrr_bar_plot_data ( benchmark_result ) stats_df = pd . DataFrame ( self . stats ) stats_df . set_index ( \"Run\" ) . plot ( kind = \"bar\" , stacked = True , color = self . palette_hex_codes , ylabel = benchmark_generator . y_label , edgecolor = \"white\" , ) . legend ( loc = \"center left\" , bbox_to_anchor = ( 1.0 , 0.5 )) if title is None : plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } Rank Stats\" ) else : plt . title ( title , loc = \"center\" , fontsize = 15 ) plt . ylim ( 0 , 100 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _rank_stats.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) mrr_df = pd . DataFrame ( self . mrr ) mrr_df . set_index ( \"Run\" ) . plot ( kind = \"bar\" , color = self . palette_hex_codes , ylabel = f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } mean reciprocal rank\" , legend = False , edgecolor = \"white\" , ) plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } results - mean reciprocal rank\" ) plt . ylim ( 0 , 1 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _mrr.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) def _generate_cumulative_bar_plot_data ( self , benchmark_result : BenchmarkRunResults ): \"\"\" Generate data in the correct format for dataframe creation for a cumulative bar plot, appending to the self.stats attribute of the class. Args: benchmark_result (BenchmarkRunResults): The benchmarking results for a run. \"\"\" rank_stats = benchmark_result . rank_stats run_identifier = self . return_benchmark_name ( benchmark_result ) self . stats . extend ( [ { \"Rank\" : \"Top\" , \"Percentage\" : rank_stats . percentage_top () / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"Top3\" , \"Percentage\" : rank_stats . percentage_top3 () / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"Top5\" , \"Percentage\" : rank_stats . percentage_top5 () / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"Top10\" , \"Percentage\" : rank_stats . percentage_top10 () / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"Found\" , \"Percentage\" : rank_stats . percentage_found () / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"Missed\" , \"Percentage\" : rank_stats . percentage_difference ( 100 , rank_stats . percentage_found () ) / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"MRR\" , \"Percentage\" : rank_stats . return_mean_reciprocal_rank (), \"Run\" : run_identifier , }, ] ) def generate_cumulative_bar ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , title : str = None , ) -> None : \"\"\" Generate a cumulative bar plot. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. title (str, optional): Title for the generated plot. Defaults to None. \"\"\" for benchmark_result in benchmarking_results : self . _generate_cumulative_bar_plot_data ( benchmark_result ) stats_df = pd . DataFrame ( self . stats ) sns . catplot ( data = stats_df , kind = \"bar\" , x = \"Rank\" , y = \"Percentage\" , hue = \"Run\" , palette = self . palette_hex_codes , edgecolor = \"white\" , legend = False , ) . set ( xlabel = \"Rank\" , ylabel = benchmark_generator . y_label ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 ), ncol = 3 , title = \"Run\" ) if title is None : plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } Cumulative Rank Stats\" ) else : plt . title ( title , loc = \"center\" , fontsize = 15 ) plt . ylim ( 0 , 1 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _rank_stats.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) def _generate_non_cumulative_bar_plot_data ( self , benchmark_result : BenchmarkRunResults ) -> [ dict ]: \"\"\" Generate data in the correct format for dataframe creation for a non-cumulative bar plot, appending to the self.stats attribute of the class. Args: benchmark_result (BenchmarkRunResults): The benchmarking results for a run. \"\"\" rank_stats = benchmark_result . rank_stats run_identifier = self . return_benchmark_name ( benchmark_result ) self . stats . extend ( [ { \"Rank\" : \"Top\" , \"Percentage\" : rank_stats . percentage_top () / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"2-3\" , \"Percentage\" : rank_stats . percentage_difference ( rank_stats . percentage_top3 (), rank_stats . percentage_top () ) / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"4-5\" , \"Percentage\" : rank_stats . percentage_difference ( rank_stats . percentage_top5 (), rank_stats . percentage_top3 () ) / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"6-10\" , \"Percentage\" : rank_stats . percentage_difference ( rank_stats . percentage_top10 (), rank_stats . percentage_top5 () ) / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \">10\" , \"Percentage\" : rank_stats . percentage_difference ( rank_stats . percentage_found (), rank_stats . percentage_top10 () ) / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"Missed\" , \"Percentage\" : rank_stats . percentage_difference ( 100 , rank_stats . percentage_found () ) / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"MRR\" , \"Percentage\" : rank_stats . return_mean_reciprocal_rank (), \"Run\" : run_identifier , }, ] ) def generate_roc_curve ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , ): \"\"\" Generate and plot Receiver Operating Characteristic (ROC) curves for binary classification benchmark results. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. \"\"\" for i , benchmark_result in enumerate ( benchmarking_results ): fpr , tpr , thresh = roc_curve ( benchmark_result . binary_classification_stats . labels , benchmark_result . binary_classification_stats . scores , pos_label = 1 , ) roc_auc = auc ( fpr , tpr ) plt . plot ( fpr , tpr , label = f \" { self . return_benchmark_name ( benchmark_result ) } ROC Curve (AUC = { roc_auc : .2f } )\" , color = self . palette_hex_codes [ i ], ) plt . plot ( linestyle = \"--\" , color = \"gray\" ) plt . xlabel ( \"False Positive Rate\" ) plt . ylabel ( \"True Positive Rate\" ) plt . title ( \"Receiver Operating Characteristic (ROC) Curve\" ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 )) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _roc_curve.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) def generate_precision_recall ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , ): \"\"\" Generate and plot Precision-Recall curves for binary classification benchmark results. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. \"\"\" plt . figure () for i , benchmark_result in enumerate ( benchmarking_results ): precision , recall , thresh = precision_recall_curve ( benchmark_result . binary_classification_stats . labels , benchmark_result . binary_classification_stats . scores , ) precision_recall_auc = auc ( recall , precision ) plt . plot ( recall , precision , label = f \" { self . return_benchmark_name ( benchmark_result ) } Precision-Recall Curve \" f \"(AUC = { precision_recall_auc : .2f } )\" , color = self . palette_hex_codes [ i ], ) plt . plot ( linestyle = \"--\" , color = \"gray\" ) plt . xlabel ( \"Recall\" ) plt . ylabel ( \"Precision\" ) plt . title ( \"Precision-Recall Curve\" ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 )) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _precision_recall_curve.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) def generate_non_cumulative_bar ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , title : str = None , ) -> None : \"\"\" Generate a non-cumulative bar plot. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. title (str, optional): Title for the generated plot. Defaults to None. \"\"\" for benchmark_result in benchmarking_results : self . _generate_non_cumulative_bar_plot_data ( benchmark_result ) stats_df = pd . DataFrame ( self . stats ) sns . catplot ( data = stats_df , kind = \"bar\" , x = \"Rank\" , y = \"Percentage\" , hue = \"Run\" , palette = self . palette_hex_codes , edgecolor = \"white\" , legend = False , ) . set ( xlabel = \"Rank\" , ylabel = benchmark_generator . y_label ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 ), ncol = 3 , title = \"Run\" ) if title is None : plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } Non-Cumulative Rank Stats\" ) else : plt . title ( title , loc = \"center\" , fontsize = 15 ) plt . ylim ( 0 , 1 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _rank_stats.svg\" , format = \"svg\" , bbox_inches = \"tight\" , )","title":"PlotGenerator"},{"location":"api/pheval/analyse/generate_plots/#src.pheval.analyse.generate_plots.PlotGenerator.__init__","text":"Initialise the PlotGenerator class. Note self.stats will be used to store statistics data. self.mrr will store Mean Reciprocal Rank (MRR) values. Matplotlib settings are configured to remove the right and top axes spines for generated plots. Source code in src/pheval/analyse/generate_plots.py 53 54 55 56 57 58 59 60 61 62 63 64 65 66 def __init__ ( self , ): \"\"\" Initialise the PlotGenerator class. Note: `self.stats` will be used to store statistics data. `self.mrr` will store Mean Reciprocal Rank (MRR) values. Matplotlib settings are configured to remove the right and top axes spines for generated plots. \"\"\" self . stats , self . mrr = [], [] matplotlib . rcParams [ \"axes.spines.right\" ] = False matplotlib . rcParams [ \"axes.spines.top\" ] = False","title":"__init__()"},{"location":"api/pheval/analyse/generate_plots/#src.pheval.analyse.generate_plots.PlotGenerator.generate_cumulative_bar","text":"Generate a cumulative bar plot. Parameters: Name Type Description Default benchmarking_results List [ BenchmarkRunResults ] List of benchmarking results for multiple runs. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required title str Title for the generated plot. Defaults to None. None Source code in src/pheval/analyse/generate_plots.py 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 def generate_cumulative_bar ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , title : str = None , ) -> None : \"\"\" Generate a cumulative bar plot. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. title (str, optional): Title for the generated plot. Defaults to None. \"\"\" for benchmark_result in benchmarking_results : self . _generate_cumulative_bar_plot_data ( benchmark_result ) stats_df = pd . DataFrame ( self . stats ) sns . catplot ( data = stats_df , kind = \"bar\" , x = \"Rank\" , y = \"Percentage\" , hue = \"Run\" , palette = self . palette_hex_codes , edgecolor = \"white\" , legend = False , ) . set ( xlabel = \"Rank\" , ylabel = benchmark_generator . y_label ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 ), ncol = 3 , title = \"Run\" ) if title is None : plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } Cumulative Rank Stats\" ) else : plt . title ( title , loc = \"center\" , fontsize = 15 ) plt . ylim ( 0 , 1 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _rank_stats.svg\" , format = \"svg\" , bbox_inches = \"tight\" , )","title":"generate_cumulative_bar()"},{"location":"api/pheval/analyse/generate_plots/#src.pheval.analyse.generate_plots.PlotGenerator.generate_non_cumulative_bar","text":"Generate a non-cumulative bar plot. Parameters: Name Type Description Default benchmarking_results List [ BenchmarkRunResults ] List of benchmarking results for multiple runs. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required title str Title for the generated plot. Defaults to None. None Source code in src/pheval/analyse/generate_plots.py 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 def generate_non_cumulative_bar ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , title : str = None , ) -> None : \"\"\" Generate a non-cumulative bar plot. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. title (str, optional): Title for the generated plot. Defaults to None. \"\"\" for benchmark_result in benchmarking_results : self . _generate_non_cumulative_bar_plot_data ( benchmark_result ) stats_df = pd . DataFrame ( self . stats ) sns . catplot ( data = stats_df , kind = \"bar\" , x = \"Rank\" , y = \"Percentage\" , hue = \"Run\" , palette = self . palette_hex_codes , edgecolor = \"white\" , legend = False , ) . set ( xlabel = \"Rank\" , ylabel = benchmark_generator . y_label ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 ), ncol = 3 , title = \"Run\" ) if title is None : plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } Non-Cumulative Rank Stats\" ) else : plt . title ( title , loc = \"center\" , fontsize = 15 ) plt . ylim ( 0 , 1 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _rank_stats.svg\" , format = \"svg\" , bbox_inches = \"tight\" , )","title":"generate_non_cumulative_bar()"},{"location":"api/pheval/analyse/generate_plots/#src.pheval.analyse.generate_plots.PlotGenerator.generate_precision_recall","text":"Generate and plot Precision-Recall curves for binary classification benchmark results. Parameters: Name Type Description Default benchmarking_results List [ BenchmarkRunResults ] List of benchmarking results for multiple runs. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required Source code in src/pheval/analyse/generate_plots.py 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 def generate_precision_recall ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , ): \"\"\" Generate and plot Precision-Recall curves for binary classification benchmark results. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. \"\"\" plt . figure () for i , benchmark_result in enumerate ( benchmarking_results ): precision , recall , thresh = precision_recall_curve ( benchmark_result . binary_classification_stats . labels , benchmark_result . binary_classification_stats . scores , ) precision_recall_auc = auc ( recall , precision ) plt . plot ( recall , precision , label = f \" { self . return_benchmark_name ( benchmark_result ) } Precision-Recall Curve \" f \"(AUC = { precision_recall_auc : .2f } )\" , color = self . palette_hex_codes [ i ], ) plt . plot ( linestyle = \"--\" , color = \"gray\" ) plt . xlabel ( \"Recall\" ) plt . ylabel ( \"Precision\" ) plt . title ( \"Precision-Recall Curve\" ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 )) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _precision_recall_curve.svg\" , format = \"svg\" , bbox_inches = \"tight\" , )","title":"generate_precision_recall()"},{"location":"api/pheval/analyse/generate_plots/#src.pheval.analyse.generate_plots.PlotGenerator.generate_roc_curve","text":"Generate and plot Receiver Operating Characteristic (ROC) curves for binary classification benchmark results. Parameters: Name Type Description Default benchmarking_results List [ BenchmarkRunResults ] List of benchmarking results for multiple runs. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required Source code in src/pheval/analyse/generate_plots.py 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 def generate_roc_curve ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , ): \"\"\" Generate and plot Receiver Operating Characteristic (ROC) curves for binary classification benchmark results. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. \"\"\" for i , benchmark_result in enumerate ( benchmarking_results ): fpr , tpr , thresh = roc_curve ( benchmark_result . binary_classification_stats . labels , benchmark_result . binary_classification_stats . scores , pos_label = 1 , ) roc_auc = auc ( fpr , tpr ) plt . plot ( fpr , tpr , label = f \" { self . return_benchmark_name ( benchmark_result ) } ROC Curve (AUC = { roc_auc : .2f } )\" , color = self . palette_hex_codes [ i ], ) plt . plot ( linestyle = \"--\" , color = \"gray\" ) plt . xlabel ( \"False Positive Rate\" ) plt . ylabel ( \"True Positive Rate\" ) plt . title ( \"Receiver Operating Characteristic (ROC) Curve\" ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 )) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _roc_curve.svg\" , format = \"svg\" , bbox_inches = \"tight\" , )","title":"generate_roc_curve()"},{"location":"api/pheval/analyse/generate_plots/#src.pheval.analyse.generate_plots.PlotGenerator.generate_stacked_bar_plot","text":"Generate a stacked bar plot and Mean Reciprocal Rank (MRR) bar plot. Parameters: Name Type Description Default benchmarking_results List [ BenchmarkRunResults ] List of benchmarking results for multiple runs. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required title str Title for the generated plot. Defaults to None. None Source code in src/pheval/analyse/generate_plots.py 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 def generate_stacked_bar_plot ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , title : str = None , ) -> None : \"\"\" Generate a stacked bar plot and Mean Reciprocal Rank (MRR) bar plot. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. title (str, optional): Title for the generated plot. Defaults to None. \"\"\" for benchmark_result in benchmarking_results : self . _generate_stacked_bar_plot_data ( benchmark_result ) self . _generate_stats_mrr_bar_plot_data ( benchmark_result ) stats_df = pd . DataFrame ( self . stats ) stats_df . set_index ( \"Run\" ) . plot ( kind = \"bar\" , stacked = True , color = self . palette_hex_codes , ylabel = benchmark_generator . y_label , edgecolor = \"white\" , ) . legend ( loc = \"center left\" , bbox_to_anchor = ( 1.0 , 0.5 )) if title is None : plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } Rank Stats\" ) else : plt . title ( title , loc = \"center\" , fontsize = 15 ) plt . ylim ( 0 , 100 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _rank_stats.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) mrr_df = pd . DataFrame ( self . mrr ) mrr_df . set_index ( \"Run\" ) . plot ( kind = \"bar\" , color = self . palette_hex_codes , ylabel = f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } mean reciprocal rank\" , legend = False , edgecolor = \"white\" , ) plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } results - mean reciprocal rank\" ) plt . ylim ( 0 , 1 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _mrr.svg\" , format = \"svg\" , bbox_inches = \"tight\" , )","title":"generate_stacked_bar_plot()"},{"location":"api/pheval/analyse/generate_plots/#src.pheval.analyse.generate_plots.PlotGenerator.return_benchmark_name","text":"Return the benchmark name for a run. Parameters: Name Type Description Default benchmark_result BenchmarkRunResults The benchmarking results for a run. required Returns: Name Type Description str str The benchmark name obtained from the given BenchmarkRunResults instance. Source code in src/pheval/analyse/generate_plots.py 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 def return_benchmark_name ( self , benchmark_result : BenchmarkRunResults ) -> str : \"\"\" Return the benchmark name for a run. Args: benchmark_result (BenchmarkRunResults): The benchmarking results for a run. Returns: str: The benchmark name obtained from the given BenchmarkRunResults instance. \"\"\" return ( benchmark_result . benchmark_name if benchmark_result . results_dir is None else self . _create_run_identifier ( benchmark_result . results_dir ) )","title":"return_benchmark_name()"},{"location":"api/pheval/analyse/generate_plots/#src.pheval.analyse.generate_plots.generate_plots","text":"Generate summary statistics bar plots for prioritisation. This method generates summary statistics bar plots based on the provided benchmarking results and plot type. Parameters: Name Type Description Default benchmarking_results list [ BenchmarkRunResults ] List of benchmarking results for multiple runs. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required plot_type str Type of plot to be generated (\"bar_stacked\", \"bar_cumulative\", \"bar_non_cumulative\"). required title str Title for the generated plot. Defaults to None. None generate_from_tsv bool Specify whether to generate plots from the TSV file. Defaults to False. False Source code in src/pheval/analyse/generate_plots.py 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 def generate_plots ( benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , plot_type : str , title : str = None , generate_from_tsv : bool = False , ) -> None : \"\"\" Generate summary statistics bar plots for prioritisation. This method generates summary statistics bar plots based on the provided benchmarking results and plot type. Args: benchmarking_results (list[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. plot_type (str): Type of plot to be generated (\"bar_stacked\", \"bar_cumulative\", \"bar_non_cumulative\"). title (str, optional): Title for the generated plot. Defaults to None. generate_from_tsv (bool): Specify whether to generate plots from the TSV file. Defaults to False. \"\"\" plot_generator = PlotGenerator () if not generate_from_tsv : plot_generator . generate_roc_curve ( benchmarking_results , benchmark_generator ) plot_generator . generate_precision_recall ( benchmarking_results , benchmark_generator ) if plot_type == \"bar_stacked\" : plot_generator . generate_stacked_bar_plot ( benchmarking_results , benchmark_generator , title ) elif plot_type == \"bar_cumulative\" : plot_generator . generate_cumulative_bar ( benchmarking_results , benchmark_generator , title ) elif plot_type == \"bar_non_cumulative\" : plot_generator . generate_non_cumulative_bar ( benchmarking_results , benchmark_generator , title )","title":"generate_plots()"},{"location":"api/pheval/analyse/generate_plots/#src.pheval.analyse.generate_plots.generate_plots_from_benchmark_summary_tsv","text":"Generate bar plot from summary benchmark results. Reads a summary of benchmark results from a TSV file and generates a bar plot based on the analysis type and plot type. Parameters: Name Type Description Default benchmark_summary_tsv Path Path to the summary TSV file containing benchmark results. required gene_analysis bool Flag indicating whether to analyse gene prioritisation. required variant_analysis bool Flag indicating whether to analyse variant prioritisation. required disease_analysis bool Flag indicating whether to analyse disease prioritisation. required plot_type str Type of plot to be generated (\"bar_stacked\", \"bar_cumulative\", \"bar_non_cumulative\"). required title str Title for the generated plot. required Raises: Type Description ValueError If an unsupported plot type is specified. Source code in src/pheval/analyse/generate_plots.py 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 def generate_plots_from_benchmark_summary_tsv ( benchmark_summary_tsv : Path , gene_analysis : bool , variant_analysis : bool , disease_analysis : bool , plot_type : str , title : str , ): \"\"\" Generate bar plot from summary benchmark results. Reads a summary of benchmark results from a TSV file and generates a bar plot based on the analysis type and plot type. Args: benchmark_summary_tsv (Path): Path to the summary TSV file containing benchmark results. gene_analysis (bool): Flag indicating whether to analyse gene prioritisation. variant_analysis (bool): Flag indicating whether to analyse variant prioritisation. disease_analysis (bool): Flag indicating whether to analyse disease prioritisation. plot_type (str): Type of plot to be generated (\"bar_stacked\", \"bar_cumulative\", \"bar_non_cumulative\"). title (str): Title for the generated plot. Raises: ValueError: If an unsupported plot type is specified. \"\"\" benchmark_stats_summary = read_benchmark_tsv_result_summary ( benchmark_summary_tsv ) benchmarking_results = parse_benchmark_result_summary ( benchmark_stats_summary ) if gene_analysis : benchmark_generator = GeneBenchmarkRunOutputGenerator () elif variant_analysis : benchmark_generator = VariantBenchmarkRunOutputGenerator () elif disease_analysis : benchmark_generator = DiseaseBenchmarkRunOutputGenerator () else : raise ValueError ( \"Specify one analysis type (gene_analysis, variant_analysis, or disease_analysis)\" ) generate_plots ( benchmarking_results , benchmark_generator , plot_type , title , True )","title":"generate_plots_from_benchmark_summary_tsv()"},{"location":"api/pheval/analyse/generate_plots/#src.pheval.analyse.generate_plots.trim_corpus_results_directory_suffix","text":"Trim the suffix from the corpus results directory name. Parameters: Name Type Description Default corpus_results_directory Path The directory path containing corpus results. required Returns: Name Type Description Path Path The Path object with the suffix removed from the directory name. Source code in src/pheval/analyse/generate_plots.py 24 25 26 27 28 29 30 31 32 33 34 def trim_corpus_results_directory_suffix ( corpus_results_directory : Path ) -> Path : \"\"\" Trim the suffix from the corpus results directory name. Args: corpus_results_directory (Path): The directory path containing corpus results. Returns: Path: The Path object with the suffix removed from the directory name. \"\"\" return Path ( str ( corpus_results_directory ) . replace ( PHEVAL_RESULTS_DIRECTORY_SUFFIX , \"\" ))","title":"trim_corpus_results_directory_suffix()"},{"location":"api/pheval/analyse/generate_summary_outputs/","text":"RankComparisonGenerator Class for writing the run comparison of rank assignment for prioritisation. Source code in src/pheval/analyse/generate_summary_outputs.py 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 class RankComparisonGenerator : \"\"\"Class for writing the run comparison of rank assignment for prioritisation.\"\"\" def __init__ ( self , run_comparison : defaultdict ): \"\"\" Initialise the RankComparisonGenerator class. Args: run_comparison (defaultdict): A nested dictionary containing the run comparison data. \"\"\" self . run_comparison = run_comparison def _generate_dataframe ( self ) -> pd . DataFrame : \"\"\" Generate a Pandas DataFrame based on the run comparison data. Returns: pd.DataFrame: DataFrame containing the run comparison data. \"\"\" return pd . DataFrame . from_dict ( self . run_comparison , orient = \"index\" ) def _calculate_rank_difference ( self ) -> pd . DataFrame : \"\"\" Calculate the rank decrease for runs, taking the first directory as a baseline. Returns: pd.DataFrame: DataFrame containing the calculated rank differences. \"\"\" comparison_df = self . _generate_dataframe () comparison_df [ \"rank_change\" ] = comparison_df . iloc [:, 2 ] - comparison_df . iloc [:, 3 ] comparison_df [ \"rank_change\" ] = np . where ( ( comparison_df . iloc [:, 2 ] == 0 ) & ( comparison_df . iloc [:, 3 ] != 0 ), \"GAINED\" , np . where ( ( comparison_df . iloc [:, 3 ] == 0 ) & ( comparison_df . iloc [:, 2 ] != 0 ), \"LOST\" , comparison_df [ \"rank_change\" ], ), ) comparison_df [ \"rank_change\" ] = comparison_df [ \"rank_change\" ] . apply ( lambda x : int ( x ) if str ( x ) . lstrip ( \"-\" ) . isdigit () else x ) return comparison_df def generate_output ( self , prefix : str , suffix : str ) -> None : \"\"\" Generate output file from the run comparison data. Args: prefix (str): Prefix for the output file name. suffix (str): Suffix for the output file name. \"\"\" self . _generate_dataframe () . to_csv ( prefix + suffix , sep = \" \\t \" ) def generate_comparison_output ( self , prefix : str , suffix : str ) -> None : \"\"\" Generate output file with calculated rank differences. Args: prefix (str): Prefix for the output file name. suffix (str): Suffix for the output file name. \"\"\" self . _calculate_rank_difference () . to_csv ( prefix + suffix , sep = \" \\t \" ) __init__ ( run_comparison ) Initialise the RankComparisonGenerator class. Parameters: Name Type Description Default run_comparison defaultdict A nested dictionary containing the run comparison data. required Source code in src/pheval/analyse/generate_summary_outputs.py 18 19 20 21 22 23 24 25 def __init__ ( self , run_comparison : defaultdict ): \"\"\" Initialise the RankComparisonGenerator class. Args: run_comparison (defaultdict): A nested dictionary containing the run comparison data. \"\"\" self . run_comparison = run_comparison generate_comparison_output ( prefix , suffix ) Generate output file with calculated rank differences. Parameters: Name Type Description Default prefix str Prefix for the output file name. required suffix str Suffix for the output file name. required Source code in src/pheval/analyse/generate_summary_outputs.py 69 70 71 72 73 74 75 76 77 def generate_comparison_output ( self , prefix : str , suffix : str ) -> None : \"\"\" Generate output file with calculated rank differences. Args: prefix (str): Prefix for the output file name. suffix (str): Suffix for the output file name. \"\"\" self . _calculate_rank_difference () . to_csv ( prefix + suffix , sep = \" \\t \" ) generate_output ( prefix , suffix ) Generate output file from the run comparison data. Parameters: Name Type Description Default prefix str Prefix for the output file name. required suffix str Suffix for the output file name. required Source code in src/pheval/analyse/generate_summary_outputs.py 59 60 61 62 63 64 65 66 67 def generate_output ( self , prefix : str , suffix : str ) -> None : \"\"\" Generate output file from the run comparison data. Args: prefix (str): Prefix for the output file name. suffix (str): Suffix for the output file name. \"\"\" self . _generate_dataframe () . to_csv ( prefix + suffix , sep = \" \\t \" ) generate_benchmark_comparison_output ( benchmarking_results , plot_type , benchmark_generator ) Generate prioritisation outputs for benchmarking multiple runs. This function generates comparison outputs for benchmarking multiple runs. It compares the results between pairs of BenchmarkRunResults instances in benchmarking_results and generates rank comparison outputs using RankComparisonGenerator for each pair. Parameters: Name Type Description Default benchmarking_results List [ BenchmarkRunResults ] A list containing BenchmarkRunResults instances representing the benchmarking results of multiple runs. required plot_type str The type of plot to be generated. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required Source code in src/pheval/analyse/generate_summary_outputs.py 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 def generate_benchmark_comparison_output ( benchmarking_results : List [ BenchmarkRunResults ], plot_type : str , benchmark_generator : BenchmarkRunOutputGenerator , ) -> None : \"\"\" Generate prioritisation outputs for benchmarking multiple runs. This function generates comparison outputs for benchmarking multiple runs. It compares the results between pairs of `BenchmarkRunResults` instances in `benchmarking_results` and generates rank comparison outputs using `RankComparisonGenerator` for each pair. Args: benchmarking_results (List[BenchmarkRunResults]): A list containing BenchmarkRunResults instances representing the benchmarking results of multiple runs. plot_type (str): The type of plot to be generated. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. \"\"\" output_prefix = benchmark_generator . prioritisation_type_file_prefix for pair in itertools . combinations ( benchmarking_results , 2 ): result1 = pair [ 0 ] result2 = pair [ 1 ] merged_results = merge_results ( deepcopy ( result1 . ranks ), deepcopy ( result2 . ranks ), ) RankComparisonGenerator ( merged_results ) . generate_comparison_output ( f \" { result1 . results_dir . parents [ 0 ] . name } _\" f \" { result1 . results_dir . name } \" f \"_vs_ { result2 . results_dir . parents [ 0 ] . name } _\" f \" { result2 . results_dir . name } \" , f \"- { output_prefix }{ RANK_COMPARISON_FILE_SUFFIX } \" , ) generate_plots ( benchmarking_results , benchmark_generator , plot_type , ) generate_benchmark_output ( benchmarking_results , plot_type , benchmark_generator ) Generate prioritisation outputs for a single benchmarking run. Parameters: Name Type Description Default benchmarking_results BenchmarkRunResults Results of a benchmarking run. required plot_type str Type of plot to generate. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required Source code in src/pheval/analyse/generate_summary_outputs.py 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 def generate_benchmark_output ( benchmarking_results : BenchmarkRunResults , plot_type : str , benchmark_generator : BenchmarkRunOutputGenerator , ) -> None : \"\"\" Generate prioritisation outputs for a single benchmarking run. Args: benchmarking_results (BenchmarkRunResults): Results of a benchmarking run. plot_type (str): Type of plot to generate. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. \"\"\" rank_comparison_data = benchmarking_results . ranks results_dir_name = benchmarking_results . results_dir . name RankComparisonGenerator ( rank_comparison_data ) . generate_output ( f \" { results_dir_name } \" , f \"- { benchmark_generator . prioritisation_type_file_prefix }{ RANK_COMPARISON_FILE_SUFFIX } \" , ) generate_plots ( [ benchmarking_results ], benchmark_generator , plot_type , ) merge_results ( result1 , result2 ) Merge two nested dictionaries containing results on commonalities. This function merges two dictionaries, result1 and result2 , containing nested structures. It traverses the dictionaries recursively and merges their contents based on common keys. If a key is present in both dictionaries and points to another dictionary, the function will further merge their nested contents. If a key exists in result2 but not in result1 , it will be added to result1 . Parameters: Name Type Description Default result1 dict The first dictionary to be merged. required result2 dict The second dictionary to be merged. required Returns: Name Type Description defaultdict defaultdict The merged dictionary containing the combined contents of result1 and result2 . Source code in src/pheval/analyse/generate_summary_outputs.py 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 def merge_results ( result1 : dict , result2 : dict ) -> defaultdict : \"\"\" Merge two nested dictionaries containing results on commonalities. This function merges two dictionaries, `result1` and `result2`, containing nested structures. It traverses the dictionaries recursively and merges their contents based on common keys. If a key is present in both dictionaries and points to another dictionary, the function will further merge their nested contents. If a key exists in `result2` but not in `result1`, it will be added to `result1`. Args: result1 (dict): The first dictionary to be merged. result2 (dict): The second dictionary to be merged. Returns: defaultdict: The merged dictionary containing the combined contents of `result1` and `result2`. \"\"\" for key , val in result1 . items (): if type ( val ) == dict : if key in result2 and type ( result2 [ key ] == dict ): merge_results ( result1 [ key ], result2 [ key ]) else : if key in result2 : result1 [ key ] = result2 [ key ] for key , val in result2 . items (): if key not in result1 : result1 [ key ] = val return result1","title":"Generate summary outputs"},{"location":"api/pheval/analyse/generate_summary_outputs/#src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator","text":"Class for writing the run comparison of rank assignment for prioritisation. Source code in src/pheval/analyse/generate_summary_outputs.py 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 class RankComparisonGenerator : \"\"\"Class for writing the run comparison of rank assignment for prioritisation.\"\"\" def __init__ ( self , run_comparison : defaultdict ): \"\"\" Initialise the RankComparisonGenerator class. Args: run_comparison (defaultdict): A nested dictionary containing the run comparison data. \"\"\" self . run_comparison = run_comparison def _generate_dataframe ( self ) -> pd . DataFrame : \"\"\" Generate a Pandas DataFrame based on the run comparison data. Returns: pd.DataFrame: DataFrame containing the run comparison data. \"\"\" return pd . DataFrame . from_dict ( self . run_comparison , orient = \"index\" ) def _calculate_rank_difference ( self ) -> pd . DataFrame : \"\"\" Calculate the rank decrease for runs, taking the first directory as a baseline. Returns: pd.DataFrame: DataFrame containing the calculated rank differences. \"\"\" comparison_df = self . _generate_dataframe () comparison_df [ \"rank_change\" ] = comparison_df . iloc [:, 2 ] - comparison_df . iloc [:, 3 ] comparison_df [ \"rank_change\" ] = np . where ( ( comparison_df . iloc [:, 2 ] == 0 ) & ( comparison_df . iloc [:, 3 ] != 0 ), \"GAINED\" , np . where ( ( comparison_df . iloc [:, 3 ] == 0 ) & ( comparison_df . iloc [:, 2 ] != 0 ), \"LOST\" , comparison_df [ \"rank_change\" ], ), ) comparison_df [ \"rank_change\" ] = comparison_df [ \"rank_change\" ] . apply ( lambda x : int ( x ) if str ( x ) . lstrip ( \"-\" ) . isdigit () else x ) return comparison_df def generate_output ( self , prefix : str , suffix : str ) -> None : \"\"\" Generate output file from the run comparison data. Args: prefix (str): Prefix for the output file name. suffix (str): Suffix for the output file name. \"\"\" self . _generate_dataframe () . to_csv ( prefix + suffix , sep = \" \\t \" ) def generate_comparison_output ( self , prefix : str , suffix : str ) -> None : \"\"\" Generate output file with calculated rank differences. Args: prefix (str): Prefix for the output file name. suffix (str): Suffix for the output file name. \"\"\" self . _calculate_rank_difference () . to_csv ( prefix + suffix , sep = \" \\t \" )","title":"RankComparisonGenerator"},{"location":"api/pheval/analyse/generate_summary_outputs/#src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.__init__","text":"Initialise the RankComparisonGenerator class. Parameters: Name Type Description Default run_comparison defaultdict A nested dictionary containing the run comparison data. required Source code in src/pheval/analyse/generate_summary_outputs.py 18 19 20 21 22 23 24 25 def __init__ ( self , run_comparison : defaultdict ): \"\"\" Initialise the RankComparisonGenerator class. Args: run_comparison (defaultdict): A nested dictionary containing the run comparison data. \"\"\" self . run_comparison = run_comparison","title":"__init__()"},{"location":"api/pheval/analyse/generate_summary_outputs/#src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.generate_comparison_output","text":"Generate output file with calculated rank differences. Parameters: Name Type Description Default prefix str Prefix for the output file name. required suffix str Suffix for the output file name. required Source code in src/pheval/analyse/generate_summary_outputs.py 69 70 71 72 73 74 75 76 77 def generate_comparison_output ( self , prefix : str , suffix : str ) -> None : \"\"\" Generate output file with calculated rank differences. Args: prefix (str): Prefix for the output file name. suffix (str): Suffix for the output file name. \"\"\" self . _calculate_rank_difference () . to_csv ( prefix + suffix , sep = \" \\t \" )","title":"generate_comparison_output()"},{"location":"api/pheval/analyse/generate_summary_outputs/#src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.generate_output","text":"Generate output file from the run comparison data. Parameters: Name Type Description Default prefix str Prefix for the output file name. required suffix str Suffix for the output file name. required Source code in src/pheval/analyse/generate_summary_outputs.py 59 60 61 62 63 64 65 66 67 def generate_output ( self , prefix : str , suffix : str ) -> None : \"\"\" Generate output file from the run comparison data. Args: prefix (str): Prefix for the output file name. suffix (str): Suffix for the output file name. \"\"\" self . _generate_dataframe () . to_csv ( prefix + suffix , sep = \" \\t \" )","title":"generate_output()"},{"location":"api/pheval/analyse/generate_summary_outputs/#src.pheval.analyse.generate_summary_outputs.generate_benchmark_comparison_output","text":"Generate prioritisation outputs for benchmarking multiple runs. This function generates comparison outputs for benchmarking multiple runs. It compares the results between pairs of BenchmarkRunResults instances in benchmarking_results and generates rank comparison outputs using RankComparisonGenerator for each pair. Parameters: Name Type Description Default benchmarking_results List [ BenchmarkRunResults ] A list containing BenchmarkRunResults instances representing the benchmarking results of multiple runs. required plot_type str The type of plot to be generated. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required Source code in src/pheval/analyse/generate_summary_outputs.py 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 def generate_benchmark_comparison_output ( benchmarking_results : List [ BenchmarkRunResults ], plot_type : str , benchmark_generator : BenchmarkRunOutputGenerator , ) -> None : \"\"\" Generate prioritisation outputs for benchmarking multiple runs. This function generates comparison outputs for benchmarking multiple runs. It compares the results between pairs of `BenchmarkRunResults` instances in `benchmarking_results` and generates rank comparison outputs using `RankComparisonGenerator` for each pair. Args: benchmarking_results (List[BenchmarkRunResults]): A list containing BenchmarkRunResults instances representing the benchmarking results of multiple runs. plot_type (str): The type of plot to be generated. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. \"\"\" output_prefix = benchmark_generator . prioritisation_type_file_prefix for pair in itertools . combinations ( benchmarking_results , 2 ): result1 = pair [ 0 ] result2 = pair [ 1 ] merged_results = merge_results ( deepcopy ( result1 . ranks ), deepcopy ( result2 . ranks ), ) RankComparisonGenerator ( merged_results ) . generate_comparison_output ( f \" { result1 . results_dir . parents [ 0 ] . name } _\" f \" { result1 . results_dir . name } \" f \"_vs_ { result2 . results_dir . parents [ 0 ] . name } _\" f \" { result2 . results_dir . name } \" , f \"- { output_prefix }{ RANK_COMPARISON_FILE_SUFFIX } \" , ) generate_plots ( benchmarking_results , benchmark_generator , plot_type , )","title":"generate_benchmark_comparison_output()"},{"location":"api/pheval/analyse/generate_summary_outputs/#src.pheval.analyse.generate_summary_outputs.generate_benchmark_output","text":"Generate prioritisation outputs for a single benchmarking run. Parameters: Name Type Description Default benchmarking_results BenchmarkRunResults Results of a benchmarking run. required plot_type str Type of plot to generate. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required Source code in src/pheval/analyse/generate_summary_outputs.py 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 def generate_benchmark_output ( benchmarking_results : BenchmarkRunResults , plot_type : str , benchmark_generator : BenchmarkRunOutputGenerator , ) -> None : \"\"\" Generate prioritisation outputs for a single benchmarking run. Args: benchmarking_results (BenchmarkRunResults): Results of a benchmarking run. plot_type (str): Type of plot to generate. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. \"\"\" rank_comparison_data = benchmarking_results . ranks results_dir_name = benchmarking_results . results_dir . name RankComparisonGenerator ( rank_comparison_data ) . generate_output ( f \" { results_dir_name } \" , f \"- { benchmark_generator . prioritisation_type_file_prefix }{ RANK_COMPARISON_FILE_SUFFIX } \" , ) generate_plots ( [ benchmarking_results ], benchmark_generator , plot_type , )","title":"generate_benchmark_output()"},{"location":"api/pheval/analyse/generate_summary_outputs/#src.pheval.analyse.generate_summary_outputs.merge_results","text":"Merge two nested dictionaries containing results on commonalities. This function merges two dictionaries, result1 and result2 , containing nested structures. It traverses the dictionaries recursively and merges their contents based on common keys. If a key is present in both dictionaries and points to another dictionary, the function will further merge their nested contents. If a key exists in result2 but not in result1 , it will be added to result1 . Parameters: Name Type Description Default result1 dict The first dictionary to be merged. required result2 dict The second dictionary to be merged. required Returns: Name Type Description defaultdict defaultdict The merged dictionary containing the combined contents of result1 and result2 . Source code in src/pheval/analyse/generate_summary_outputs.py 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 def merge_results ( result1 : dict , result2 : dict ) -> defaultdict : \"\"\" Merge two nested dictionaries containing results on commonalities. This function merges two dictionaries, `result1` and `result2`, containing nested structures. It traverses the dictionaries recursively and merges their contents based on common keys. If a key is present in both dictionaries and points to another dictionary, the function will further merge their nested contents. If a key exists in `result2` but not in `result1`, it will be added to `result1`. Args: result1 (dict): The first dictionary to be merged. result2 (dict): The second dictionary to be merged. Returns: defaultdict: The merged dictionary containing the combined contents of `result1` and `result2`. \"\"\" for key , val in result1 . items (): if type ( val ) == dict : if key in result2 and type ( result2 [ key ] == dict ): merge_results ( result1 [ key ], result2 [ key ]) else : if key in result2 : result1 [ key ] = result2 [ key ] for key , val in result2 . items (): if key not in result1 : result1 [ key ] = val return result1","title":"merge_results()"},{"location":"api/pheval/analyse/parse_benchmark_summary/","text":"parse_benchmark_result_summary ( benchmarking_df ) Parse the summary benchmark DataFrame into a list of BenchmarkRunResults. Parameters: Name Type Description Default benchmarking_df pd . DataFrame Summary benchmark DataFrame containing columns such as 'results_directory_path', 'top', 'top3', 'top5', 'top10', 'found', 'total', 'mean_reciprocal_rank'. required Returns: Type Description List [ BenchmarkRunResults ] List[BenchmarkRunResults]: A list of BenchmarkRunResults instances generated from the DataFrame. Source code in src/pheval/analyse/parse_benchmark_summary.py 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 def parse_benchmark_result_summary ( benchmarking_df : pd . DataFrame ) -> List [ BenchmarkRunResults ]: \"\"\" Parse the summary benchmark DataFrame into a list of BenchmarkRunResults. Args: benchmarking_df (pd.DataFrame): Summary benchmark DataFrame containing columns such as 'results_directory_path', 'top', 'top3', 'top5', 'top10', 'found', 'total', 'mean_reciprocal_rank'. Returns: List[BenchmarkRunResults]: A list of BenchmarkRunResults instances generated from the DataFrame. \"\"\" benchmarking_results = [] for _ , row in benchmarking_df . iterrows (): benchmarking_result = BenchmarkRunResults ( rank_stats = RankStats ( top = row [ \"top\" ], top3 = row [ \"top3\" ], top5 = row [ \"top5\" ], top10 = row [ \"top10\" ], found = row [ \"found\" ], total = row [ \"total\" ], mrr = row [ \"mean_reciprocal_rank\" ], ), ranks = {}, benchmark_name = row [ \"results_directory_path\" ], binary_classification_stats = BinaryClassificationStats (), ) benchmarking_results . append ( benchmarking_result ) return benchmarking_results read_benchmark_tsv_result_summary ( benchmarking_tsv ) Read the summary benchmark TSV output generated from the benchmark-comparison command. Parameters: Name Type Description Default benchmarking_tsv Path Path to the summary benchmark TSV output file. required Returns: Type Description pd . DataFrame pd.DataFrame: A pandas DataFrame containing specific columns from the TSV file, including: 'results_directory_path', 'top', 'top3', 'top5', 'top10', 'found', 'total', 'mean_reciprocal_rank'. Source code in src/pheval/analyse/parse_benchmark_summary.py 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 def read_benchmark_tsv_result_summary ( benchmarking_tsv : Path ) -> pd . DataFrame : \"\"\" Read the summary benchmark TSV output generated from the benchmark-comparison command. Args: benchmarking_tsv (Path): Path to the summary benchmark TSV output file. Returns: pd.DataFrame: A pandas DataFrame containing specific columns from the TSV file, including: 'results_directory_path', 'top', 'top3', 'top5', 'top10', 'found', 'total', 'mean_reciprocal_rank'. \"\"\" return pd . read_csv ( benchmarking_tsv , delimiter = \" \\t \" , usecols = [ \"results_directory_path\" , \"top\" , \"top3\" , \"top5\" , \"top10\" , \"found\" , \"total\" , \"mean_reciprocal_rank\" , ], )","title":"Parse benchmark summary"},{"location":"api/pheval/analyse/parse_benchmark_summary/#src.pheval.analyse.parse_benchmark_summary.parse_benchmark_result_summary","text":"Parse the summary benchmark DataFrame into a list of BenchmarkRunResults. Parameters: Name Type Description Default benchmarking_df pd . DataFrame Summary benchmark DataFrame containing columns such as 'results_directory_path', 'top', 'top3', 'top5', 'top10', 'found', 'total', 'mean_reciprocal_rank'. required Returns: Type Description List [ BenchmarkRunResults ] List[BenchmarkRunResults]: A list of BenchmarkRunResults instances generated from the DataFrame. Source code in src/pheval/analyse/parse_benchmark_summary.py 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 def parse_benchmark_result_summary ( benchmarking_df : pd . DataFrame ) -> List [ BenchmarkRunResults ]: \"\"\" Parse the summary benchmark DataFrame into a list of BenchmarkRunResults. Args: benchmarking_df (pd.DataFrame): Summary benchmark DataFrame containing columns such as 'results_directory_path', 'top', 'top3', 'top5', 'top10', 'found', 'total', 'mean_reciprocal_rank'. Returns: List[BenchmarkRunResults]: A list of BenchmarkRunResults instances generated from the DataFrame. \"\"\" benchmarking_results = [] for _ , row in benchmarking_df . iterrows (): benchmarking_result = BenchmarkRunResults ( rank_stats = RankStats ( top = row [ \"top\" ], top3 = row [ \"top3\" ], top5 = row [ \"top5\" ], top10 = row [ \"top10\" ], found = row [ \"found\" ], total = row [ \"total\" ], mrr = row [ \"mean_reciprocal_rank\" ], ), ranks = {}, benchmark_name = row [ \"results_directory_path\" ], binary_classification_stats = BinaryClassificationStats (), ) benchmarking_results . append ( benchmarking_result ) return benchmarking_results","title":"parse_benchmark_result_summary()"},{"location":"api/pheval/analyse/parse_benchmark_summary/#src.pheval.analyse.parse_benchmark_summary.read_benchmark_tsv_result_summary","text":"Read the summary benchmark TSV output generated from the benchmark-comparison command. Parameters: Name Type Description Default benchmarking_tsv Path Path to the summary benchmark TSV output file. required Returns: Type Description pd . DataFrame pd.DataFrame: A pandas DataFrame containing specific columns from the TSV file, including: 'results_directory_path', 'top', 'top3', 'top5', 'top10', 'found', 'total', 'mean_reciprocal_rank'. Source code in src/pheval/analyse/parse_benchmark_summary.py 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 def read_benchmark_tsv_result_summary ( benchmarking_tsv : Path ) -> pd . DataFrame : \"\"\" Read the summary benchmark TSV output generated from the benchmark-comparison command. Args: benchmarking_tsv (Path): Path to the summary benchmark TSV output file. Returns: pd.DataFrame: A pandas DataFrame containing specific columns from the TSV file, including: 'results_directory_path', 'top', 'top3', 'top5', 'top10', 'found', 'total', 'mean_reciprocal_rank'. \"\"\" return pd . read_csv ( benchmarking_tsv , delimiter = \" \\t \" , usecols = [ \"results_directory_path\" , \"top\" , \"top3\" , \"top5\" , \"top10\" , \"found\" , \"total\" , \"mean_reciprocal_rank\" , ], )","title":"read_benchmark_tsv_result_summary()"},{"location":"api/pheval/analyse/parse_pheval_result/","text":"parse_pheval_result ( data_class_type , pheval_result ) Parse PhEval result into specified dataclass type. Parameters: Name Type Description Default data_class_type PhEvalResult The data class type to parse the result into. required pheval_result List [ dict ] A list of dictionaries representing the PhEval result. required Returns: Type Description List [ PhEvalResult ] List[PhEvalResult]: A list of instances of the specified data class type, List [ PhEvalResult ] each instance representing a row in the PhEval result. Source code in src/pheval/analyse/parse_pheval_result.py 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 def parse_pheval_result ( data_class_type : PhEvalResult , pheval_result : List [ dict ] ) -> List [ PhEvalResult ]: \"\"\" Parse PhEval result into specified dataclass type. Args: data_class_type (PhEvalResult): The data class type to parse the result into. pheval_result (List[dict]): A list of dictionaries representing the PhEval result. Returns: List[PhEvalResult]: A list of instances of the specified data class type, each instance representing a row in the PhEval result. \"\"\" return [ data_class_type ( ** row ) for row in pheval_result ] read_standardised_result ( standardised_result_path ) Read the standardised result output and return a list of dictionaries. Parameters: Name Type Description Default standardised_result_path Path The path to the file containing the standardised result output. required Returns: Type Description List [ dict ] List[dict]: A list of dictionaries representing the content of the standardised result file. Source code in src/pheval/analyse/parse_pheval_result.py 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 def read_standardised_result ( standardised_result_path : Path ) -> List [ dict ]: \"\"\" Read the standardised result output and return a list of dictionaries. Args: standardised_result_path (Path): The path to the file containing the standardised result output. Returns: List[dict]: A list of dictionaries representing the content of the standardised result file. \"\"\" if standardised_result_path . is_file (): return pd . read_csv ( standardised_result_path , delimiter = \" \\t \" ) . to_dict ( \"records\" ) else : info_log . info ( f \"Could not find { standardised_result_path } \" ) return pd . DataFrame () . to_dict ( \"records\" )","title":"Parse pheval result"},{"location":"api/pheval/analyse/parse_pheval_result/#src.pheval.analyse.parse_pheval_result.parse_pheval_result","text":"Parse PhEval result into specified dataclass type. Parameters: Name Type Description Default data_class_type PhEvalResult The data class type to parse the result into. required pheval_result List [ dict ] A list of dictionaries representing the PhEval result. required Returns: Type Description List [ PhEvalResult ] List[PhEvalResult]: A list of instances of the specified data class type, List [ PhEvalResult ] each instance representing a row in the PhEval result. Source code in src/pheval/analyse/parse_pheval_result.py 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 def parse_pheval_result ( data_class_type : PhEvalResult , pheval_result : List [ dict ] ) -> List [ PhEvalResult ]: \"\"\" Parse PhEval result into specified dataclass type. Args: data_class_type (PhEvalResult): The data class type to parse the result into. pheval_result (List[dict]): A list of dictionaries representing the PhEval result. Returns: List[PhEvalResult]: A list of instances of the specified data class type, each instance representing a row in the PhEval result. \"\"\" return [ data_class_type ( ** row ) for row in pheval_result ]","title":"parse_pheval_result()"},{"location":"api/pheval/analyse/parse_pheval_result/#src.pheval.analyse.parse_pheval_result.read_standardised_result","text":"Read the standardised result output and return a list of dictionaries. Parameters: Name Type Description Default standardised_result_path Path The path to the file containing the standardised result output. required Returns: Type Description List [ dict ] List[dict]: A list of dictionaries representing the content of the standardised result file. Source code in src/pheval/analyse/parse_pheval_result.py 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 def read_standardised_result ( standardised_result_path : Path ) -> List [ dict ]: \"\"\" Read the standardised result output and return a list of dictionaries. Args: standardised_result_path (Path): The path to the file containing the standardised result output. Returns: List[dict]: A list of dictionaries representing the content of the standardised result file. \"\"\" if standardised_result_path . is_file (): return pd . read_csv ( standardised_result_path , delimiter = \" \\t \" ) . to_dict ( \"records\" ) else : info_log . info ( f \"Could not find { standardised_result_path } \" ) return pd . DataFrame () . to_dict ( \"records\" )","title":"read_standardised_result()"},{"location":"api/pheval/analyse/prioritisation_rank_recorder/","text":"PrioritisationRankRecorder dataclass Record ranks for different types of prioritisation results. Attributes: Name Type Description index int The index representing the run. directory Path The result directory path. run_comparison defaultdict The comparison dictionary to record ranks. Source code in src/pheval/analyse/prioritisation_rank_recorder.py 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 @dataclass class PrioritisationRankRecorder : \"\"\" Record ranks for different types of prioritisation results. Attributes: index (int): The index representing the run. directory (Path): The result directory path. prioritisation_result (Union[GenePrioritisationResult, VariantPrioritisationResult, DiseasePrioritisationResult]): The prioritisation result object. run_comparison (defaultdict): The comparison dictionary to record ranks. \"\"\" index : int directory : Path prioritisation_result : Union [ GenePrioritisationResult , VariantPrioritisationResult , DiseasePrioritisationResult ] run_comparison : defaultdict def _record_gene_rank ( self ) -> None : \"\"\" Record gene prioritisation rank. This method updates the 'Gene' key in the run comparison dictionary with the gene information extracted from the correct prioritisation result. \"\"\" self . run_comparison [ self . index ][ \"Gene\" ] = self . prioritisation_result . gene def _record_variant_rank ( self ) -> None : \"\"\" Record variant prioritisation rank. This method updates the 'Variant' key in the run comparison dictionary with the variant information extracted from the correct prioritisation result. \"\"\" variant = self . prioritisation_result . variant self . run_comparison [ self . index ][ \"Variant\" ] = \"-\" . join ( [ variant . chrom , str ( variant . pos ), variant . ref , variant . alt ] ) def _record_disease_rank ( self ) -> None : \"\"\" Record disease prioritisation rank. This method updates the 'Disease' key in the run comparison dictionary with the disease information extracted from the correct prioritisation result. \"\"\" self . run_comparison [ self . index ][ \"Disease\" ] = self . prioritisation_result . disease . disease_identifier def record_rank ( self ) -> None : \"\"\" Record the prioritisation ranks for different runs. It assigns the prioritisation rank and associated details such as phenopacket name and prioritisation result type ('Gene', 'Variant', or 'Disease') to the run comparison dictionary for each respective run, allowing comparison and analysis of the ranks of correct results across different runs. \"\"\" self . run_comparison [ self . index ][ \"Phenopacket\" ] = self . prioritisation_result . phenopacket_path . name if type ( self . prioritisation_result ) is GenePrioritisationResult : self . _record_gene_rank () elif type ( self . prioritisation_result ) is VariantPrioritisationResult : self . _record_variant_rank () elif type ( self . prioritisation_result ) is DiseasePrioritisationResult : self . _record_disease_rank () self . run_comparison [ self . index ][ self . directory ] = self . prioritisation_result . rank record_rank () Record the prioritisation ranks for different runs. It assigns the prioritisation rank and associated details such as phenopacket name and prioritisation result type ('Gene', 'Variant', or 'Disease') to the run comparison dictionary for each respective run, allowing comparison and analysis of the ranks of correct results across different runs. Source code in src/pheval/analyse/prioritisation_rank_recorder.py 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 def record_rank ( self ) -> None : \"\"\" Record the prioritisation ranks for different runs. It assigns the prioritisation rank and associated details such as phenopacket name and prioritisation result type ('Gene', 'Variant', or 'Disease') to the run comparison dictionary for each respective run, allowing comparison and analysis of the ranks of correct results across different runs. \"\"\" self . run_comparison [ self . index ][ \"Phenopacket\" ] = self . prioritisation_result . phenopacket_path . name if type ( self . prioritisation_result ) is GenePrioritisationResult : self . _record_gene_rank () elif type ( self . prioritisation_result ) is VariantPrioritisationResult : self . _record_variant_rank () elif type ( self . prioritisation_result ) is DiseasePrioritisationResult : self . _record_disease_rank () self . run_comparison [ self . index ][ self . directory ] = self . prioritisation_result . rank","title":"Prioritisation rank recorder"},{"location":"api/pheval/analyse/prioritisation_rank_recorder/#src.pheval.analyse.prioritisation_rank_recorder.PrioritisationRankRecorder","text":"Record ranks for different types of prioritisation results. Attributes: Name Type Description index int The index representing the run. directory Path The result directory path. run_comparison defaultdict The comparison dictionary to record ranks. Source code in src/pheval/analyse/prioritisation_rank_recorder.py 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 @dataclass class PrioritisationRankRecorder : \"\"\" Record ranks for different types of prioritisation results. Attributes: index (int): The index representing the run. directory (Path): The result directory path. prioritisation_result (Union[GenePrioritisationResult, VariantPrioritisationResult, DiseasePrioritisationResult]): The prioritisation result object. run_comparison (defaultdict): The comparison dictionary to record ranks. \"\"\" index : int directory : Path prioritisation_result : Union [ GenePrioritisationResult , VariantPrioritisationResult , DiseasePrioritisationResult ] run_comparison : defaultdict def _record_gene_rank ( self ) -> None : \"\"\" Record gene prioritisation rank. This method updates the 'Gene' key in the run comparison dictionary with the gene information extracted from the correct prioritisation result. \"\"\" self . run_comparison [ self . index ][ \"Gene\" ] = self . prioritisation_result . gene def _record_variant_rank ( self ) -> None : \"\"\" Record variant prioritisation rank. This method updates the 'Variant' key in the run comparison dictionary with the variant information extracted from the correct prioritisation result. \"\"\" variant = self . prioritisation_result . variant self . run_comparison [ self . index ][ \"Variant\" ] = \"-\" . join ( [ variant . chrom , str ( variant . pos ), variant . ref , variant . alt ] ) def _record_disease_rank ( self ) -> None : \"\"\" Record disease prioritisation rank. This method updates the 'Disease' key in the run comparison dictionary with the disease information extracted from the correct prioritisation result. \"\"\" self . run_comparison [ self . index ][ \"Disease\" ] = self . prioritisation_result . disease . disease_identifier def record_rank ( self ) -> None : \"\"\" Record the prioritisation ranks for different runs. It assigns the prioritisation rank and associated details such as phenopacket name and prioritisation result type ('Gene', 'Variant', or 'Disease') to the run comparison dictionary for each respective run, allowing comparison and analysis of the ranks of correct results across different runs. \"\"\" self . run_comparison [ self . index ][ \"Phenopacket\" ] = self . prioritisation_result . phenopacket_path . name if type ( self . prioritisation_result ) is GenePrioritisationResult : self . _record_gene_rank () elif type ( self . prioritisation_result ) is VariantPrioritisationResult : self . _record_variant_rank () elif type ( self . prioritisation_result ) is DiseasePrioritisationResult : self . _record_disease_rank () self . run_comparison [ self . index ][ self . directory ] = self . prioritisation_result . rank","title":"PrioritisationRankRecorder"},{"location":"api/pheval/analyse/prioritisation_rank_recorder/#src.pheval.analyse.prioritisation_rank_recorder.PrioritisationRankRecorder.record_rank","text":"Record the prioritisation ranks for different runs. It assigns the prioritisation rank and associated details such as phenopacket name and prioritisation result type ('Gene', 'Variant', or 'Disease') to the run comparison dictionary for each respective run, allowing comparison and analysis of the ranks of correct results across different runs. Source code in src/pheval/analyse/prioritisation_rank_recorder.py 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 def record_rank ( self ) -> None : \"\"\" Record the prioritisation ranks for different runs. It assigns the prioritisation rank and associated details such as phenopacket name and prioritisation result type ('Gene', 'Variant', or 'Disease') to the run comparison dictionary for each respective run, allowing comparison and analysis of the ranks of correct results across different runs. \"\"\" self . run_comparison [ self . index ][ \"Phenopacket\" ] = self . prioritisation_result . phenopacket_path . name if type ( self . prioritisation_result ) is GenePrioritisationResult : self . _record_gene_rank () elif type ( self . prioritisation_result ) is VariantPrioritisationResult : self . _record_variant_rank () elif type ( self . prioritisation_result ) is DiseasePrioritisationResult : self . _record_disease_rank () self . run_comparison [ self . index ][ self . directory ] = self . prioritisation_result . rank","title":"record_rank()"},{"location":"api/pheval/analyse/prioritisation_result_types/","text":"DiseasePrioritisationResult dataclass Store rank data for known diseases. Attributes: Name Type Description phenopacket_path Path Path to the phenopacket. disease ProbandDisease The proband disease. rank int The assigned rank for the disease. Defaults to 0. Source code in src/pheval/analyse/prioritisation_result_types.py 39 40 41 42 43 44 45 46 47 48 49 50 51 52 @dataclass class DiseasePrioritisationResult : \"\"\" Store rank data for known diseases. Attributes: phenopacket_path (Path): Path to the phenopacket. disease (ProbandDisease): The proband disease. rank (int): The assigned rank for the disease. Defaults to 0. \"\"\" phenopacket_path : Path disease : ProbandDisease rank : int = 0 GenePrioritisationResult dataclass Store rank data for causative genes. Attributes: Name Type Description phenopacket_path Path Path to the phenopacket. gene str The causative gene. rank int The assigned rank for the gene. Defaults to 0. Source code in src/pheval/analyse/prioritisation_result_types.py 7 8 9 10 11 12 13 14 15 16 17 18 19 20 @dataclass class GenePrioritisationResult : \"\"\" Store rank data for causative genes. Attributes: phenopacket_path (Path): Path to the phenopacket. gene (str): The causative gene. rank (int): The assigned rank for the gene. Defaults to 0. \"\"\" phenopacket_path : Path gene : str rank : int = 0 VariantPrioritisationResult dataclass Store rank data for variants. Attributes: Name Type Description phenopacket_path Path Path to the phenopacket. variant GenomicVariant The genomic variant. rank int The assigned rank for the variant. Defaults to 0. Source code in src/pheval/analyse/prioritisation_result_types.py 23 24 25 26 27 28 29 30 31 32 33 34 35 36 @dataclass class VariantPrioritisationResult : \"\"\" Store rank data for variants. Attributes: phenopacket_path (Path): Path to the phenopacket. variant (GenomicVariant): The genomic variant. rank (int): The assigned rank for the variant. Defaults to 0. \"\"\" phenopacket_path : Path variant : GenomicVariant rank : int = 0","title":"Prioritisation result types"},{"location":"api/pheval/analyse/prioritisation_result_types/#src.pheval.analyse.prioritisation_result_types.DiseasePrioritisationResult","text":"Store rank data for known diseases. Attributes: Name Type Description phenopacket_path Path Path to the phenopacket. disease ProbandDisease The proband disease. rank int The assigned rank for the disease. Defaults to 0. Source code in src/pheval/analyse/prioritisation_result_types.py 39 40 41 42 43 44 45 46 47 48 49 50 51 52 @dataclass class DiseasePrioritisationResult : \"\"\" Store rank data for known diseases. Attributes: phenopacket_path (Path): Path to the phenopacket. disease (ProbandDisease): The proband disease. rank (int): The assigned rank for the disease. Defaults to 0. \"\"\" phenopacket_path : Path disease : ProbandDisease rank : int = 0","title":"DiseasePrioritisationResult"},{"location":"api/pheval/analyse/prioritisation_result_types/#src.pheval.analyse.prioritisation_result_types.GenePrioritisationResult","text":"Store rank data for causative genes. Attributes: Name Type Description phenopacket_path Path Path to the phenopacket. gene str The causative gene. rank int The assigned rank for the gene. Defaults to 0. Source code in src/pheval/analyse/prioritisation_result_types.py 7 8 9 10 11 12 13 14 15 16 17 18 19 20 @dataclass class GenePrioritisationResult : \"\"\" Store rank data for causative genes. Attributes: phenopacket_path (Path): Path to the phenopacket. gene (str): The causative gene. rank (int): The assigned rank for the gene. Defaults to 0. \"\"\" phenopacket_path : Path gene : str rank : int = 0","title":"GenePrioritisationResult"},{"location":"api/pheval/analyse/prioritisation_result_types/#src.pheval.analyse.prioritisation_result_types.VariantPrioritisationResult","text":"Store rank data for variants. Attributes: Name Type Description phenopacket_path Path Path to the phenopacket. variant GenomicVariant The genomic variant. rank int The assigned rank for the variant. Defaults to 0. Source code in src/pheval/analyse/prioritisation_result_types.py 23 24 25 26 27 28 29 30 31 32 33 34 35 36 @dataclass class VariantPrioritisationResult : \"\"\" Store rank data for variants. Attributes: phenopacket_path (Path): Path to the phenopacket. variant (GenomicVariant): The genomic variant. rank (int): The assigned rank for the variant. Defaults to 0. \"\"\" phenopacket_path : Path variant : GenomicVariant rank : int = 0","title":"VariantPrioritisationResult"},{"location":"api/pheval/analyse/rank_stats/","text":"RankStats dataclass Store statistics related to ranking. Attributes: Name Type Description top int Count of top-ranked matches. top3 int Count of matches within the top 3 ranks. top5 int Count of matches within the top 5 ranks. top10 int Count of matches within the top 10 ranks. found int Count of found matches. total int Total count of matches. reciprocal_ranks List [ float ] List of reciprocal ranks. relevant_ranks List [ List [ int ]] Nested list of ranks for the known entities for all cases in a run. mrr float Mean Reciprocal Rank (MRR). Defaults to None. Source code in src/pheval/analyse/rank_stats.py 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 @dataclass class RankStats : \"\"\"Store statistics related to ranking. Attributes: top (int): Count of top-ranked matches. top3 (int): Count of matches within the top 3 ranks. top5 (int): Count of matches within the top 5 ranks. top10 (int): Count of matches within the top 10 ranks. found (int): Count of found matches. total (int): Total count of matches. reciprocal_ranks (List[float]): List of reciprocal ranks. relevant_ranks List[List[int]]: Nested list of ranks for the known entities for all cases in a run. mrr (float): Mean Reciprocal Rank (MRR). Defaults to None. \"\"\" top : int = 0 top3 : int = 0 top5 : int = 0 top10 : int = 0 found : int = 0 total : int = 0 reciprocal_ranks : List = field ( default_factory = list ) relevant_result_ranks : List [ List [ int ]] = field ( default_factory = list ) mrr : float = None def add_rank ( self , rank : int ) -> None : \"\"\" Add rank for matched result. Args: rank (int): The rank value to be added. Notes: This method updates the internal attributes of the RankStats object based on the provided rank value. It calculates various statistics such as the count of top ranks (1, 3, 5, and 10), the total number of ranks found,and the reciprocal rank. This function modifies the object's state by updating the internal attributes. \"\"\" self . reciprocal_ranks . append ( 1 / rank ) self . found += 1 if rank == 1 : self . top += 1 if rank != \"\" and rank <= 3 : self . top3 += 1 if rank != \"\" and rank <= 5 : self . top5 += 1 if rank != \"\" and rank <= 10 : self . top10 += 1 def percentage_rank ( self , value : int ) -> float : \"\"\" Calculate the percentage rank. Args: value (int): The value for which the percentage rank needs to be calculated. Returns: float: The calculated percentage rank based on the provided value and the total count. \"\"\" return 100 * value / self . total def percentage_top ( self ) -> float : \"\"\" Calculate the percentage of top matches. Returns: float: The percentage of top matches compared to the total count. \"\"\" return self . percentage_rank ( self . top ) def percentage_top3 ( self ) -> float : \"\"\" Calculate the percentage of matches within the top 3. Returns: float: The percentage of matches within the top 3 compared to the total count. \"\"\" return self . percentage_rank ( self . top3 ) def percentage_top5 ( self ) -> float : \"\"\" Calculate the percentage of matches within the top 5. Returns: float: The percentage of matches within the top 5 compared to the total count. \"\"\" return self . percentage_rank ( self . top5 ) def percentage_top10 ( self ) -> float : \"\"\" Calculate the percentage of matches within the top 10. Returns: float: The percentage of matches within the top 10 compared to the total count. \"\"\" return self . percentage_rank ( self . top10 ) def percentage_found ( self ) -> float : \"\"\" Calculate the percentage of matches found. Returns: float: The percentage of matches found compared to the total count. \"\"\" return self . percentage_rank ( self . found ) @staticmethod def percentage_difference ( percentage_value_1 : float , percentage_value_2 : float ) -> float : \"\"\" Calculate the percentage difference between two percentage values. Args: percentage_value_1 (float): The first percentage value. percentage_value_2 (float): The second percentage value. Returns: float: The difference between the two percentage values. \"\"\" return percentage_value_1 - percentage_value_2 def mean_reciprocal_rank ( self ) -> float : \"\"\" Calculate the Mean Reciprocal Rank (MRR) for the stored ranks. The Mean Reciprocal Rank is computed as the mean of the reciprocal ranks for the found cases. If the total number of cases differs from the number of found cases, this method extends the reciprocal ranks list with zeroes for missing cases. Returns: float: The calculated Mean Reciprocal Rank. \"\"\" if len ( self . reciprocal_ranks ) != self . total : missing_cases = self . total - self . found self . reciprocal_ranks . extend ([ 0 ] * missing_cases ) return mean ( self . reciprocal_ranks ) return mean ( self . reciprocal_ranks ) def return_mean_reciprocal_rank ( self ) -> float : \"\"\" Retrieve or calculate the Mean Reciprocal Rank (MRR). If a pre-calculated MRR value exists (stored in the 'mrr' attribute), this method returns that value. Otherwise, it computes the Mean Reciprocal Rank using the 'mean_reciprocal_rank' method. Returns: float: The Mean Reciprocal Rank value. \"\"\" if self . mrr is not None : return self . mrr else : return self . mean_reciprocal_rank () def precision_at_k ( self , k : int ) -> float : \"\"\" Calculate the precision at k. Precision at k is the ratio of relevant items in the top-k predictions to the total number of predictions. It measures the accuracy of the top-k predictions made by a model. Args: k (int): The number of top predictions to consider. Returns: float: The precision at k, ranging from 0.0 to 1.0. A higher precision indicates a better performance in identifying relevant items in the top-k predictions. \"\"\" k_attr = getattr ( self , f \"top { k } \" ) if k > 1 else self . top return k_attr / ( self . total * k ) @staticmethod def _average_precision_at_k ( number_of_relevant_entities_at_k : int , precision_at_k : float ) -> float : \"\"\" Calculate the Average Precision at k. Average Precision at k (AP@k) is a metric used to evaluate the precision of a ranked retrieval system. It measures the precision at each relevant position up to k and takes the average. Args: number_of_relevant_entities_at_k (int): The count of relevant entities in the top-k predictions. precision_at_k (float): The precision at k - the sum of the precision values at each relevant position. Returns: float: The Average Precision at k, ranging from 0.0 to 1.0. A higher value indicates better precision in the top-k predictions. \"\"\" return ( ( 1 / number_of_relevant_entities_at_k ) * precision_at_k if number_of_relevant_entities_at_k > 0 else 0.0 ) def mean_average_precision_at_k ( self , k : int ) -> float : \"\"\" Calculate the Mean Average Precision at k. Mean Average Precision at k (MAP@k) is a performance metric for ranked data. It calculates the average precision at k for each result rank and then takes the mean across all queries. Args: k (int): The number of top predictions to consider for precision calculation. Returns: float: The Mean Average Precision at k, ranging from 0.0 to 1.0. A higher value indicates better performance in ranking relevant entities higher in the predictions. \"\"\" cumulative_average_precision_scores = 0 for result_ranks in self . relevant_result_ranks : precision_at_k , number_of_relevant_entities_at_k = 0 , 0 for rank in result_ranks : if 0 < rank <= k : number_of_relevant_entities_at_k += 1 precision_at_k += number_of_relevant_entities_at_k / rank cumulative_average_precision_scores += self . _average_precision_at_k ( number_of_relevant_entities_at_k , precision_at_k ) return ( 1 / self . total ) * cumulative_average_precision_scores def f_beta_score_at_k ( self , percentage_at_k : float , k : int ) -> float : \"\"\" Calculate the F-beta score at k. The F-beta score is a metric that combines precision and recall, with beta controlling the emphasis on precision. The Beta value is set to the value of 1 to allow for equal weighting for both precision and recall. This method computes the F-beta score at a specific percentage threshold within the top-k predictions. Args: percentage_at_k (float): The percentage of true positive predictions within the top-k. k (int): The number of top predictions to consider. Returns: float: The F-beta score at k, ranging from 0.0 to 1.0. A higher score indicates better trade-off between precision and recall. \"\"\" precision = self . precision_at_k ( k ) recall_at_k = percentage_at_k / 100 return ( ( 2 * precision * recall_at_k ) / ( precision + recall_at_k ) if ( precision + recall_at_k ) > 0 else 0 ) def mean_normalised_discounted_cumulative_gain ( self , k : int ) -> float : \"\"\" Calculate the mean Normalised Discounted Cumulative Gain (NDCG) for a given rank cutoff. NDCG measures the effectiveness of a ranking by considering both the relevance and the order of items. Args: k (int): The rank cutoff for calculating NDCG. Returns: float: The mean NDCG score across all query results. \"\"\" ndcg_scores = [] for result_ranks in self . relevant_result_ranks : result_ranks = [ rank for rank in result_ranks if rank <= k ] result_ranks = [ 3 if i in result_ranks else 0 for i in range ( k )] ideal_ranking = sorted ( result_ranks , reverse = True ) ndcg_scores . append ( ndcg_score ( np . asarray ([ ideal_ranking ]), np . asarray ([ result_ranks ]))) return np . mean ( ndcg_scores ) add_rank ( rank ) Add rank for matched result. Parameters: Name Type Description Default rank int The rank value to be added. required Notes This method updates the internal attributes of the RankStats object based on the provided rank value. It calculates various statistics such as the count of top ranks (1, 3, 5, and 10), the total number of ranks found,and the reciprocal rank. This function modifies the object's state by updating the internal attributes. Source code in src/pheval/analyse/rank_stats.py 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 def add_rank ( self , rank : int ) -> None : \"\"\" Add rank for matched result. Args: rank (int): The rank value to be added. Notes: This method updates the internal attributes of the RankStats object based on the provided rank value. It calculates various statistics such as the count of top ranks (1, 3, 5, and 10), the total number of ranks found,and the reciprocal rank. This function modifies the object's state by updating the internal attributes. \"\"\" self . reciprocal_ranks . append ( 1 / rank ) self . found += 1 if rank == 1 : self . top += 1 if rank != \"\" and rank <= 3 : self . top3 += 1 if rank != \"\" and rank <= 5 : self . top5 += 1 if rank != \"\" and rank <= 10 : self . top10 += 1 f_beta_score_at_k ( percentage_at_k , k ) Calculate the F-beta score at k. The F-beta score is a metric that combines precision and recall, with beta controlling the emphasis on precision. The Beta value is set to the value of 1 to allow for equal weighting for both precision and recall. This method computes the F-beta score at a specific percentage threshold within the top-k predictions. Parameters: Name Type Description Default percentage_at_k float The percentage of true positive predictions within the top-k. required k int The number of top predictions to consider. required Returns: Name Type Description float float The F-beta score at k, ranging from 0.0 to 1.0. A higher score indicates better trade-off between precision and recall. Source code in src/pheval/analyse/rank_stats.py 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 def f_beta_score_at_k ( self , percentage_at_k : float , k : int ) -> float : \"\"\" Calculate the F-beta score at k. The F-beta score is a metric that combines precision and recall, with beta controlling the emphasis on precision. The Beta value is set to the value of 1 to allow for equal weighting for both precision and recall. This method computes the F-beta score at a specific percentage threshold within the top-k predictions. Args: percentage_at_k (float): The percentage of true positive predictions within the top-k. k (int): The number of top predictions to consider. Returns: float: The F-beta score at k, ranging from 0.0 to 1.0. A higher score indicates better trade-off between precision and recall. \"\"\" precision = self . precision_at_k ( k ) recall_at_k = percentage_at_k / 100 return ( ( 2 * precision * recall_at_k ) / ( precision + recall_at_k ) if ( precision + recall_at_k ) > 0 else 0 ) mean_average_precision_at_k ( k ) Calculate the Mean Average Precision at k. Mean Average Precision at k (MAP@k) is a performance metric for ranked data. It calculates the average precision at k for each result rank and then takes the mean across all queries. Parameters: Name Type Description Default k int The number of top predictions to consider for precision calculation. required Returns: Name Type Description float float The Mean Average Precision at k, ranging from 0.0 to 1.0. A higher value indicates better performance in ranking relevant entities higher in the predictions. Source code in src/pheval/analyse/rank_stats.py 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 def mean_average_precision_at_k ( self , k : int ) -> float : \"\"\" Calculate the Mean Average Precision at k. Mean Average Precision at k (MAP@k) is a performance metric for ranked data. It calculates the average precision at k for each result rank and then takes the mean across all queries. Args: k (int): The number of top predictions to consider for precision calculation. Returns: float: The Mean Average Precision at k, ranging from 0.0 to 1.0. A higher value indicates better performance in ranking relevant entities higher in the predictions. \"\"\" cumulative_average_precision_scores = 0 for result_ranks in self . relevant_result_ranks : precision_at_k , number_of_relevant_entities_at_k = 0 , 0 for rank in result_ranks : if 0 < rank <= k : number_of_relevant_entities_at_k += 1 precision_at_k += number_of_relevant_entities_at_k / rank cumulative_average_precision_scores += self . _average_precision_at_k ( number_of_relevant_entities_at_k , precision_at_k ) return ( 1 / self . total ) * cumulative_average_precision_scores mean_normalised_discounted_cumulative_gain ( k ) Calculate the mean Normalised Discounted Cumulative Gain (NDCG) for a given rank cutoff. NDCG measures the effectiveness of a ranking by considering both the relevance and the order of items. Parameters: Name Type Description Default k int The rank cutoff for calculating NDCG. required Returns: Name Type Description float float The mean NDCG score across all query results. Source code in src/pheval/analyse/rank_stats.py 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 def mean_normalised_discounted_cumulative_gain ( self , k : int ) -> float : \"\"\" Calculate the mean Normalised Discounted Cumulative Gain (NDCG) for a given rank cutoff. NDCG measures the effectiveness of a ranking by considering both the relevance and the order of items. Args: k (int): The rank cutoff for calculating NDCG. Returns: float: The mean NDCG score across all query results. \"\"\" ndcg_scores = [] for result_ranks in self . relevant_result_ranks : result_ranks = [ rank for rank in result_ranks if rank <= k ] result_ranks = [ 3 if i in result_ranks else 0 for i in range ( k )] ideal_ranking = sorted ( result_ranks , reverse = True ) ndcg_scores . append ( ndcg_score ( np . asarray ([ ideal_ranking ]), np . asarray ([ result_ranks ]))) return np . mean ( ndcg_scores ) mean_reciprocal_rank () Calculate the Mean Reciprocal Rank (MRR) for the stored ranks. The Mean Reciprocal Rank is computed as the mean of the reciprocal ranks for the found cases. If the total number of cases differs from the number of found cases, this method extends the reciprocal ranks list with zeroes for missing cases. Returns: Name Type Description float float The calculated Mean Reciprocal Rank. Source code in src/pheval/analyse/rank_stats.py 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 def mean_reciprocal_rank ( self ) -> float : \"\"\" Calculate the Mean Reciprocal Rank (MRR) for the stored ranks. The Mean Reciprocal Rank is computed as the mean of the reciprocal ranks for the found cases. If the total number of cases differs from the number of found cases, this method extends the reciprocal ranks list with zeroes for missing cases. Returns: float: The calculated Mean Reciprocal Rank. \"\"\" if len ( self . reciprocal_ranks ) != self . total : missing_cases = self . total - self . found self . reciprocal_ranks . extend ([ 0 ] * missing_cases ) return mean ( self . reciprocal_ranks ) return mean ( self . reciprocal_ranks ) percentage_difference ( percentage_value_1 , percentage_value_2 ) staticmethod Calculate the percentage difference between two percentage values. Parameters: Name Type Description Default percentage_value_1 float The first percentage value. required percentage_value_2 float The second percentage value. required Returns: Name Type Description float float The difference between the two percentage values. Source code in src/pheval/analyse/rank_stats.py 120 121 122 123 124 125 126 127 128 129 130 131 132 @staticmethod def percentage_difference ( percentage_value_1 : float , percentage_value_2 : float ) -> float : \"\"\" Calculate the percentage difference between two percentage values. Args: percentage_value_1 (float): The first percentage value. percentage_value_2 (float): The second percentage value. Returns: float: The difference between the two percentage values. \"\"\" return percentage_value_1 - percentage_value_2 percentage_found () Calculate the percentage of matches found. Returns: Name Type Description float float The percentage of matches found compared to the total count. Source code in src/pheval/analyse/rank_stats.py 111 112 113 114 115 116 117 118 def percentage_found ( self ) -> float : \"\"\" Calculate the percentage of matches found. Returns: float: The percentage of matches found compared to the total count. \"\"\" return self . percentage_rank ( self . found ) percentage_rank ( value ) Calculate the percentage rank. Parameters: Name Type Description Default value int The value for which the percentage rank needs to be calculated. required Returns: Name Type Description float float The calculated percentage rank based on the provided value and the total count. Source code in src/pheval/analyse/rank_stats.py 63 64 65 66 67 68 69 70 71 72 73 def percentage_rank ( self , value : int ) -> float : \"\"\" Calculate the percentage rank. Args: value (int): The value for which the percentage rank needs to be calculated. Returns: float: The calculated percentage rank based on the provided value and the total count. \"\"\" return 100 * value / self . total percentage_top () Calculate the percentage of top matches. Returns: Name Type Description float float The percentage of top matches compared to the total count. Source code in src/pheval/analyse/rank_stats.py 75 76 77 78 79 80 81 82 def percentage_top ( self ) -> float : \"\"\" Calculate the percentage of top matches. Returns: float: The percentage of top matches compared to the total count. \"\"\" return self . percentage_rank ( self . top ) percentage_top10 () Calculate the percentage of matches within the top 10. Returns: Name Type Description float float The percentage of matches within the top 10 compared to the total count. Source code in src/pheval/analyse/rank_stats.py 102 103 104 105 106 107 108 109 def percentage_top10 ( self ) -> float : \"\"\" Calculate the percentage of matches within the top 10. Returns: float: The percentage of matches within the top 10 compared to the total count. \"\"\" return self . percentage_rank ( self . top10 ) percentage_top3 () Calculate the percentage of matches within the top 3. Returns: Name Type Description float float The percentage of matches within the top 3 compared to the total count. Source code in src/pheval/analyse/rank_stats.py 84 85 86 87 88 89 90 91 def percentage_top3 ( self ) -> float : \"\"\" Calculate the percentage of matches within the top 3. Returns: float: The percentage of matches within the top 3 compared to the total count. \"\"\" return self . percentage_rank ( self . top3 ) percentage_top5 () Calculate the percentage of matches within the top 5. Returns: Name Type Description float float The percentage of matches within the top 5 compared to the total count. Source code in src/pheval/analyse/rank_stats.py 93 94 95 96 97 98 99 100 def percentage_top5 ( self ) -> float : \"\"\" Calculate the percentage of matches within the top 5. Returns: float: The percentage of matches within the top 5 compared to the total count. \"\"\" return self . percentage_rank ( self . top5 ) precision_at_k ( k ) Calculate the precision at k. Precision at k is the ratio of relevant items in the top-k predictions to the total number of predictions. It measures the accuracy of the top-k predictions made by a model. Parameters: Name Type Description Default k int The number of top predictions to consider. required Returns: Name Type Description float float The precision at k, ranging from 0.0 to 1.0. float A higher precision indicates a better performance in identifying relevant items in the top-k predictions. Source code in src/pheval/analyse/rank_stats.py 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 def precision_at_k ( self , k : int ) -> float : \"\"\" Calculate the precision at k. Precision at k is the ratio of relevant items in the top-k predictions to the total number of predictions. It measures the accuracy of the top-k predictions made by a model. Args: k (int): The number of top predictions to consider. Returns: float: The precision at k, ranging from 0.0 to 1.0. A higher precision indicates a better performance in identifying relevant items in the top-k predictions. \"\"\" k_attr = getattr ( self , f \"top { k } \" ) if k > 1 else self . top return k_attr / ( self . total * k ) return_mean_reciprocal_rank () Retrieve or calculate the Mean Reciprocal Rank (MRR). If a pre-calculated MRR value exists (stored in the 'mrr' attribute), this method returns that value. Otherwise, it computes the Mean Reciprocal Rank using the 'mean_reciprocal_rank' method. Returns: Name Type Description float float The Mean Reciprocal Rank value. Source code in src/pheval/analyse/rank_stats.py 153 154 155 156 157 158 159 160 161 162 163 164 165 166 def return_mean_reciprocal_rank ( self ) -> float : \"\"\" Retrieve or calculate the Mean Reciprocal Rank (MRR). If a pre-calculated MRR value exists (stored in the 'mrr' attribute), this method returns that value. Otherwise, it computes the Mean Reciprocal Rank using the 'mean_reciprocal_rank' method. Returns: float: The Mean Reciprocal Rank value. \"\"\" if self . mrr is not None : return self . mrr else : return self . mean_reciprocal_rank () RankStatsWriter Class for writing the rank stats to a file. Source code in src/pheval/analyse/rank_stats.py 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 class RankStatsWriter : \"\"\"Class for writing the rank stats to a file.\"\"\" def __init__ ( self , file : Path ): \"\"\" Initialise the RankStatsWriter class Args: file (Path): Path to the file where rank stats will be written \"\"\" self . file = open ( file , \"w\" ) self . writer = csv . writer ( self . file , delimiter = \" \\t \" ) self . writer . writerow ( [ \"results_directory_path\" , \"top\" , \"top3\" , \"top5\" , \"top10\" , \"found\" , \"total\" , \"mean_reciprocal_rank\" , \"percentage_top\" , \"percentage_top3\" , \"percentage_top5\" , \"percentage_top10\" , \"percentage_found\" , \"precision@1\" , \"precision@3\" , \"precision@5\" , \"precision@10\" , \"MAP@1\" , \"MAP@3\" , \"MAP@5\" , \"MAP@10\" , \"f_beta_score@1\" , \"f_beta_score@3\" , \"f_beta_score@5\" , \"f_beta_score@10\" , \"NDCG@3\" , \"NDCG@5\" , \"NDCG@10\" , \"true_positives\" , \"false_positives\" , \"true_negatives\" , \"false_negatives\" , \"sensitivity\" , \"specificity\" , \"precision\" , \"negative_predictive_value\" , \"false_positive_rate\" , \"false_discovery_rate\" , \"false_negative_rate\" , \"accuracy\" , \"f1_score\" , \"matthews_correlation_coefficient\" , ] ) def write_row ( self , directory : Path , rank_stats : RankStats , binary_classification : BinaryClassificationStats , ) -> None : \"\"\" Write summary rank statistics row for a run to the file. Args: directory (Path): Path to the results directory corresponding to the run rank_stats (RankStats): RankStats instance containing rank statistics corresponding to the run Raises: IOError: If there is an error writing to the file. \"\"\" try : self . writer . writerow ( [ directory , rank_stats . top , rank_stats . top3 , rank_stats . top5 , rank_stats . top10 , rank_stats . found , rank_stats . total , rank_stats . mean_reciprocal_rank (), rank_stats . percentage_top (), rank_stats . percentage_top3 (), rank_stats . percentage_top5 (), rank_stats . percentage_top10 (), rank_stats . percentage_found (), rank_stats . precision_at_k ( 1 ), rank_stats . precision_at_k ( 3 ), rank_stats . precision_at_k ( 5 ), rank_stats . precision_at_k ( 10 ), rank_stats . mean_average_precision_at_k ( 1 ), rank_stats . mean_average_precision_at_k ( 3 ), rank_stats . mean_average_precision_at_k ( 5 ), rank_stats . mean_average_precision_at_k ( 10 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top (), 1 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top3 (), 3 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top5 (), 5 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top10 (), 10 ), rank_stats . mean_normalised_discounted_cumulative_gain ( 3 ), rank_stats . mean_normalised_discounted_cumulative_gain ( 5 ), rank_stats . mean_normalised_discounted_cumulative_gain ( 10 ), binary_classification . true_positives , binary_classification . false_positives , binary_classification . true_negatives , binary_classification . false_negatives , binary_classification . sensitivity (), binary_classification . specificity (), binary_classification . precision (), binary_classification . negative_predictive_value (), binary_classification . false_positive_rate (), binary_classification . false_discovery_rate (), binary_classification . false_negative_rate (), binary_classification . accuracy (), binary_classification . f1_score (), binary_classification . matthews_correlation_coefficient (), ] ) except IOError : print ( \"Error writing \" , self . file ) def close ( self ) -> None : \"\"\" Close the file used for writing rank statistics. Raises: IOError: If there's an error while closing the file. \"\"\" try : self . file . close () except IOError : print ( \"Error closing \" , self . file ) __init__ ( file ) Initialise the RankStatsWriter class Parameters: Name Type Description Default file Path Path to the file where rank stats will be written required Source code in src/pheval/analyse/rank_stats.py 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 def __init__ ( self , file : Path ): \"\"\" Initialise the RankStatsWriter class Args: file (Path): Path to the file where rank stats will be written \"\"\" self . file = open ( file , \"w\" ) self . writer = csv . writer ( self . file , delimiter = \" \\t \" ) self . writer . writerow ( [ \"results_directory_path\" , \"top\" , \"top3\" , \"top5\" , \"top10\" , \"found\" , \"total\" , \"mean_reciprocal_rank\" , \"percentage_top\" , \"percentage_top3\" , \"percentage_top5\" , \"percentage_top10\" , \"percentage_found\" , \"precision@1\" , \"precision@3\" , \"precision@5\" , \"precision@10\" , \"MAP@1\" , \"MAP@3\" , \"MAP@5\" , \"MAP@10\" , \"f_beta_score@1\" , \"f_beta_score@3\" , \"f_beta_score@5\" , \"f_beta_score@10\" , \"NDCG@3\" , \"NDCG@5\" , \"NDCG@10\" , \"true_positives\" , \"false_positives\" , \"true_negatives\" , \"false_negatives\" , \"sensitivity\" , \"specificity\" , \"precision\" , \"negative_predictive_value\" , \"false_positive_rate\" , \"false_discovery_rate\" , \"false_negative_rate\" , \"accuracy\" , \"f1_score\" , \"matthews_correlation_coefficient\" , ] ) close () Close the file used for writing rank statistics. Raises: Type Description IOError If there's an error while closing the file. Source code in src/pheval/analyse/rank_stats.py 404 405 406 407 408 409 410 411 412 413 414 def close ( self ) -> None : \"\"\" Close the file used for writing rank statistics. Raises: IOError: If there's an error while closing the file. \"\"\" try : self . file . close () except IOError : print ( \"Error closing \" , self . file ) write_row ( directory , rank_stats , binary_classification ) Write summary rank statistics row for a run to the file. Parameters: Name Type Description Default directory Path Path to the results directory corresponding to the run required rank_stats RankStats RankStats instance containing rank statistics corresponding to the run required Raises: Type Description IOError If there is an error writing to the file. Source code in src/pheval/analyse/rank_stats.py 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 def write_row ( self , directory : Path , rank_stats : RankStats , binary_classification : BinaryClassificationStats , ) -> None : \"\"\" Write summary rank statistics row for a run to the file. Args: directory (Path): Path to the results directory corresponding to the run rank_stats (RankStats): RankStats instance containing rank statistics corresponding to the run Raises: IOError: If there is an error writing to the file. \"\"\" try : self . writer . writerow ( [ directory , rank_stats . top , rank_stats . top3 , rank_stats . top5 , rank_stats . top10 , rank_stats . found , rank_stats . total , rank_stats . mean_reciprocal_rank (), rank_stats . percentage_top (), rank_stats . percentage_top3 (), rank_stats . percentage_top5 (), rank_stats . percentage_top10 (), rank_stats . percentage_found (), rank_stats . precision_at_k ( 1 ), rank_stats . precision_at_k ( 3 ), rank_stats . precision_at_k ( 5 ), rank_stats . precision_at_k ( 10 ), rank_stats . mean_average_precision_at_k ( 1 ), rank_stats . mean_average_precision_at_k ( 3 ), rank_stats . mean_average_precision_at_k ( 5 ), rank_stats . mean_average_precision_at_k ( 10 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top (), 1 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top3 (), 3 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top5 (), 5 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top10 (), 10 ), rank_stats . mean_normalised_discounted_cumulative_gain ( 3 ), rank_stats . mean_normalised_discounted_cumulative_gain ( 5 ), rank_stats . mean_normalised_discounted_cumulative_gain ( 10 ), binary_classification . true_positives , binary_classification . false_positives , binary_classification . true_negatives , binary_classification . false_negatives , binary_classification . sensitivity (), binary_classification . specificity (), binary_classification . precision (), binary_classification . negative_predictive_value (), binary_classification . false_positive_rate (), binary_classification . false_discovery_rate (), binary_classification . false_negative_rate (), binary_classification . accuracy (), binary_classification . f1_score (), binary_classification . matthews_correlation_coefficient (), ] ) except IOError : print ( \"Error writing \" , self . file )","title":"Rank stats"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats","text":"Store statistics related to ranking. Attributes: Name Type Description top int Count of top-ranked matches. top3 int Count of matches within the top 3 ranks. top5 int Count of matches within the top 5 ranks. top10 int Count of matches within the top 10 ranks. found int Count of found matches. total int Total count of matches. reciprocal_ranks List [ float ] List of reciprocal ranks. relevant_ranks List [ List [ int ]] Nested list of ranks for the known entities for all cases in a run. mrr float Mean Reciprocal Rank (MRR). Defaults to None. Source code in src/pheval/analyse/rank_stats.py 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 @dataclass class RankStats : \"\"\"Store statistics related to ranking. Attributes: top (int): Count of top-ranked matches. top3 (int): Count of matches within the top 3 ranks. top5 (int): Count of matches within the top 5 ranks. top10 (int): Count of matches within the top 10 ranks. found (int): Count of found matches. total (int): Total count of matches. reciprocal_ranks (List[float]): List of reciprocal ranks. relevant_ranks List[List[int]]: Nested list of ranks for the known entities for all cases in a run. mrr (float): Mean Reciprocal Rank (MRR). Defaults to None. \"\"\" top : int = 0 top3 : int = 0 top5 : int = 0 top10 : int = 0 found : int = 0 total : int = 0 reciprocal_ranks : List = field ( default_factory = list ) relevant_result_ranks : List [ List [ int ]] = field ( default_factory = list ) mrr : float = None def add_rank ( self , rank : int ) -> None : \"\"\" Add rank for matched result. Args: rank (int): The rank value to be added. Notes: This method updates the internal attributes of the RankStats object based on the provided rank value. It calculates various statistics such as the count of top ranks (1, 3, 5, and 10), the total number of ranks found,and the reciprocal rank. This function modifies the object's state by updating the internal attributes. \"\"\" self . reciprocal_ranks . append ( 1 / rank ) self . found += 1 if rank == 1 : self . top += 1 if rank != \"\" and rank <= 3 : self . top3 += 1 if rank != \"\" and rank <= 5 : self . top5 += 1 if rank != \"\" and rank <= 10 : self . top10 += 1 def percentage_rank ( self , value : int ) -> float : \"\"\" Calculate the percentage rank. Args: value (int): The value for which the percentage rank needs to be calculated. Returns: float: The calculated percentage rank based on the provided value and the total count. \"\"\" return 100 * value / self . total def percentage_top ( self ) -> float : \"\"\" Calculate the percentage of top matches. Returns: float: The percentage of top matches compared to the total count. \"\"\" return self . percentage_rank ( self . top ) def percentage_top3 ( self ) -> float : \"\"\" Calculate the percentage of matches within the top 3. Returns: float: The percentage of matches within the top 3 compared to the total count. \"\"\" return self . percentage_rank ( self . top3 ) def percentage_top5 ( self ) -> float : \"\"\" Calculate the percentage of matches within the top 5. Returns: float: The percentage of matches within the top 5 compared to the total count. \"\"\" return self . percentage_rank ( self . top5 ) def percentage_top10 ( self ) -> float : \"\"\" Calculate the percentage of matches within the top 10. Returns: float: The percentage of matches within the top 10 compared to the total count. \"\"\" return self . percentage_rank ( self . top10 ) def percentage_found ( self ) -> float : \"\"\" Calculate the percentage of matches found. Returns: float: The percentage of matches found compared to the total count. \"\"\" return self . percentage_rank ( self . found ) @staticmethod def percentage_difference ( percentage_value_1 : float , percentage_value_2 : float ) -> float : \"\"\" Calculate the percentage difference between two percentage values. Args: percentage_value_1 (float): The first percentage value. percentage_value_2 (float): The second percentage value. Returns: float: The difference between the two percentage values. \"\"\" return percentage_value_1 - percentage_value_2 def mean_reciprocal_rank ( self ) -> float : \"\"\" Calculate the Mean Reciprocal Rank (MRR) for the stored ranks. The Mean Reciprocal Rank is computed as the mean of the reciprocal ranks for the found cases. If the total number of cases differs from the number of found cases, this method extends the reciprocal ranks list with zeroes for missing cases. Returns: float: The calculated Mean Reciprocal Rank. \"\"\" if len ( self . reciprocal_ranks ) != self . total : missing_cases = self . total - self . found self . reciprocal_ranks . extend ([ 0 ] * missing_cases ) return mean ( self . reciprocal_ranks ) return mean ( self . reciprocal_ranks ) def return_mean_reciprocal_rank ( self ) -> float : \"\"\" Retrieve or calculate the Mean Reciprocal Rank (MRR). If a pre-calculated MRR value exists (stored in the 'mrr' attribute), this method returns that value. Otherwise, it computes the Mean Reciprocal Rank using the 'mean_reciprocal_rank' method. Returns: float: The Mean Reciprocal Rank value. \"\"\" if self . mrr is not None : return self . mrr else : return self . mean_reciprocal_rank () def precision_at_k ( self , k : int ) -> float : \"\"\" Calculate the precision at k. Precision at k is the ratio of relevant items in the top-k predictions to the total number of predictions. It measures the accuracy of the top-k predictions made by a model. Args: k (int): The number of top predictions to consider. Returns: float: The precision at k, ranging from 0.0 to 1.0. A higher precision indicates a better performance in identifying relevant items in the top-k predictions. \"\"\" k_attr = getattr ( self , f \"top { k } \" ) if k > 1 else self . top return k_attr / ( self . total * k ) @staticmethod def _average_precision_at_k ( number_of_relevant_entities_at_k : int , precision_at_k : float ) -> float : \"\"\" Calculate the Average Precision at k. Average Precision at k (AP@k) is a metric used to evaluate the precision of a ranked retrieval system. It measures the precision at each relevant position up to k and takes the average. Args: number_of_relevant_entities_at_k (int): The count of relevant entities in the top-k predictions. precision_at_k (float): The precision at k - the sum of the precision values at each relevant position. Returns: float: The Average Precision at k, ranging from 0.0 to 1.0. A higher value indicates better precision in the top-k predictions. \"\"\" return ( ( 1 / number_of_relevant_entities_at_k ) * precision_at_k if number_of_relevant_entities_at_k > 0 else 0.0 ) def mean_average_precision_at_k ( self , k : int ) -> float : \"\"\" Calculate the Mean Average Precision at k. Mean Average Precision at k (MAP@k) is a performance metric for ranked data. It calculates the average precision at k for each result rank and then takes the mean across all queries. Args: k (int): The number of top predictions to consider for precision calculation. Returns: float: The Mean Average Precision at k, ranging from 0.0 to 1.0. A higher value indicates better performance in ranking relevant entities higher in the predictions. \"\"\" cumulative_average_precision_scores = 0 for result_ranks in self . relevant_result_ranks : precision_at_k , number_of_relevant_entities_at_k = 0 , 0 for rank in result_ranks : if 0 < rank <= k : number_of_relevant_entities_at_k += 1 precision_at_k += number_of_relevant_entities_at_k / rank cumulative_average_precision_scores += self . _average_precision_at_k ( number_of_relevant_entities_at_k , precision_at_k ) return ( 1 / self . total ) * cumulative_average_precision_scores def f_beta_score_at_k ( self , percentage_at_k : float , k : int ) -> float : \"\"\" Calculate the F-beta score at k. The F-beta score is a metric that combines precision and recall, with beta controlling the emphasis on precision. The Beta value is set to the value of 1 to allow for equal weighting for both precision and recall. This method computes the F-beta score at a specific percentage threshold within the top-k predictions. Args: percentage_at_k (float): The percentage of true positive predictions within the top-k. k (int): The number of top predictions to consider. Returns: float: The F-beta score at k, ranging from 0.0 to 1.0. A higher score indicates better trade-off between precision and recall. \"\"\" precision = self . precision_at_k ( k ) recall_at_k = percentage_at_k / 100 return ( ( 2 * precision * recall_at_k ) / ( precision + recall_at_k ) if ( precision + recall_at_k ) > 0 else 0 ) def mean_normalised_discounted_cumulative_gain ( self , k : int ) -> float : \"\"\" Calculate the mean Normalised Discounted Cumulative Gain (NDCG) for a given rank cutoff. NDCG measures the effectiveness of a ranking by considering both the relevance and the order of items. Args: k (int): The rank cutoff for calculating NDCG. Returns: float: The mean NDCG score across all query results. \"\"\" ndcg_scores = [] for result_ranks in self . relevant_result_ranks : result_ranks = [ rank for rank in result_ranks if rank <= k ] result_ranks = [ 3 if i in result_ranks else 0 for i in range ( k )] ideal_ranking = sorted ( result_ranks , reverse = True ) ndcg_scores . append ( ndcg_score ( np . asarray ([ ideal_ranking ]), np . asarray ([ result_ranks ]))) return np . mean ( ndcg_scores )","title":"RankStats"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.add_rank","text":"Add rank for matched result. Parameters: Name Type Description Default rank int The rank value to be added. required Notes This method updates the internal attributes of the RankStats object based on the provided rank value. It calculates various statistics such as the count of top ranks (1, 3, 5, and 10), the total number of ranks found,and the reciprocal rank. This function modifies the object's state by updating the internal attributes. Source code in src/pheval/analyse/rank_stats.py 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 def add_rank ( self , rank : int ) -> None : \"\"\" Add rank for matched result. Args: rank (int): The rank value to be added. Notes: This method updates the internal attributes of the RankStats object based on the provided rank value. It calculates various statistics such as the count of top ranks (1, 3, 5, and 10), the total number of ranks found,and the reciprocal rank. This function modifies the object's state by updating the internal attributes. \"\"\" self . reciprocal_ranks . append ( 1 / rank ) self . found += 1 if rank == 1 : self . top += 1 if rank != \"\" and rank <= 3 : self . top3 += 1 if rank != \"\" and rank <= 5 : self . top5 += 1 if rank != \"\" and rank <= 10 : self . top10 += 1","title":"add_rank()"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.f_beta_score_at_k","text":"Calculate the F-beta score at k. The F-beta score is a metric that combines precision and recall, with beta controlling the emphasis on precision. The Beta value is set to the value of 1 to allow for equal weighting for both precision and recall. This method computes the F-beta score at a specific percentage threshold within the top-k predictions. Parameters: Name Type Description Default percentage_at_k float The percentage of true positive predictions within the top-k. required k int The number of top predictions to consider. required Returns: Name Type Description float float The F-beta score at k, ranging from 0.0 to 1.0. A higher score indicates better trade-off between precision and recall. Source code in src/pheval/analyse/rank_stats.py 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 def f_beta_score_at_k ( self , percentage_at_k : float , k : int ) -> float : \"\"\" Calculate the F-beta score at k. The F-beta score is a metric that combines precision and recall, with beta controlling the emphasis on precision. The Beta value is set to the value of 1 to allow for equal weighting for both precision and recall. This method computes the F-beta score at a specific percentage threshold within the top-k predictions. Args: percentage_at_k (float): The percentage of true positive predictions within the top-k. k (int): The number of top predictions to consider. Returns: float: The F-beta score at k, ranging from 0.0 to 1.0. A higher score indicates better trade-off between precision and recall. \"\"\" precision = self . precision_at_k ( k ) recall_at_k = percentage_at_k / 100 return ( ( 2 * precision * recall_at_k ) / ( precision + recall_at_k ) if ( precision + recall_at_k ) > 0 else 0 )","title":"f_beta_score_at_k()"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.mean_average_precision_at_k","text":"Calculate the Mean Average Precision at k. Mean Average Precision at k (MAP@k) is a performance metric for ranked data. It calculates the average precision at k for each result rank and then takes the mean across all queries. Parameters: Name Type Description Default k int The number of top predictions to consider for precision calculation. required Returns: Name Type Description float float The Mean Average Precision at k, ranging from 0.0 to 1.0. A higher value indicates better performance in ranking relevant entities higher in the predictions. Source code in src/pheval/analyse/rank_stats.py 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 def mean_average_precision_at_k ( self , k : int ) -> float : \"\"\" Calculate the Mean Average Precision at k. Mean Average Precision at k (MAP@k) is a performance metric for ranked data. It calculates the average precision at k for each result rank and then takes the mean across all queries. Args: k (int): The number of top predictions to consider for precision calculation. Returns: float: The Mean Average Precision at k, ranging from 0.0 to 1.0. A higher value indicates better performance in ranking relevant entities higher in the predictions. \"\"\" cumulative_average_precision_scores = 0 for result_ranks in self . relevant_result_ranks : precision_at_k , number_of_relevant_entities_at_k = 0 , 0 for rank in result_ranks : if 0 < rank <= k : number_of_relevant_entities_at_k += 1 precision_at_k += number_of_relevant_entities_at_k / rank cumulative_average_precision_scores += self . _average_precision_at_k ( number_of_relevant_entities_at_k , precision_at_k ) return ( 1 / self . total ) * cumulative_average_precision_scores","title":"mean_average_precision_at_k()"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.mean_normalised_discounted_cumulative_gain","text":"Calculate the mean Normalised Discounted Cumulative Gain (NDCG) for a given rank cutoff. NDCG measures the effectiveness of a ranking by considering both the relevance and the order of items. Parameters: Name Type Description Default k int The rank cutoff for calculating NDCG. required Returns: Name Type Description float float The mean NDCG score across all query results. Source code in src/pheval/analyse/rank_stats.py 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 def mean_normalised_discounted_cumulative_gain ( self , k : int ) -> float : \"\"\" Calculate the mean Normalised Discounted Cumulative Gain (NDCG) for a given rank cutoff. NDCG measures the effectiveness of a ranking by considering both the relevance and the order of items. Args: k (int): The rank cutoff for calculating NDCG. Returns: float: The mean NDCG score across all query results. \"\"\" ndcg_scores = [] for result_ranks in self . relevant_result_ranks : result_ranks = [ rank for rank in result_ranks if rank <= k ] result_ranks = [ 3 if i in result_ranks else 0 for i in range ( k )] ideal_ranking = sorted ( result_ranks , reverse = True ) ndcg_scores . append ( ndcg_score ( np . asarray ([ ideal_ranking ]), np . asarray ([ result_ranks ]))) return np . mean ( ndcg_scores )","title":"mean_normalised_discounted_cumulative_gain()"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.mean_reciprocal_rank","text":"Calculate the Mean Reciprocal Rank (MRR) for the stored ranks. The Mean Reciprocal Rank is computed as the mean of the reciprocal ranks for the found cases. If the total number of cases differs from the number of found cases, this method extends the reciprocal ranks list with zeroes for missing cases. Returns: Name Type Description float float The calculated Mean Reciprocal Rank. Source code in src/pheval/analyse/rank_stats.py 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 def mean_reciprocal_rank ( self ) -> float : \"\"\" Calculate the Mean Reciprocal Rank (MRR) for the stored ranks. The Mean Reciprocal Rank is computed as the mean of the reciprocal ranks for the found cases. If the total number of cases differs from the number of found cases, this method extends the reciprocal ranks list with zeroes for missing cases. Returns: float: The calculated Mean Reciprocal Rank. \"\"\" if len ( self . reciprocal_ranks ) != self . total : missing_cases = self . total - self . found self . reciprocal_ranks . extend ([ 0 ] * missing_cases ) return mean ( self . reciprocal_ranks ) return mean ( self . reciprocal_ranks )","title":"mean_reciprocal_rank()"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.percentage_difference","text":"Calculate the percentage difference between two percentage values. Parameters: Name Type Description Default percentage_value_1 float The first percentage value. required percentage_value_2 float The second percentage value. required Returns: Name Type Description float float The difference between the two percentage values. Source code in src/pheval/analyse/rank_stats.py 120 121 122 123 124 125 126 127 128 129 130 131 132 @staticmethod def percentage_difference ( percentage_value_1 : float , percentage_value_2 : float ) -> float : \"\"\" Calculate the percentage difference between two percentage values. Args: percentage_value_1 (float): The first percentage value. percentage_value_2 (float): The second percentage value. Returns: float: The difference between the two percentage values. \"\"\" return percentage_value_1 - percentage_value_2","title":"percentage_difference()"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.percentage_found","text":"Calculate the percentage of matches found. Returns: Name Type Description float float The percentage of matches found compared to the total count. Source code in src/pheval/analyse/rank_stats.py 111 112 113 114 115 116 117 118 def percentage_found ( self ) -> float : \"\"\" Calculate the percentage of matches found. Returns: float: The percentage of matches found compared to the total count. \"\"\" return self . percentage_rank ( self . found )","title":"percentage_found()"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.percentage_rank","text":"Calculate the percentage rank. Parameters: Name Type Description Default value int The value for which the percentage rank needs to be calculated. required Returns: Name Type Description float float The calculated percentage rank based on the provided value and the total count. Source code in src/pheval/analyse/rank_stats.py 63 64 65 66 67 68 69 70 71 72 73 def percentage_rank ( self , value : int ) -> float : \"\"\" Calculate the percentage rank. Args: value (int): The value for which the percentage rank needs to be calculated. Returns: float: The calculated percentage rank based on the provided value and the total count. \"\"\" return 100 * value / self . total","title":"percentage_rank()"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.percentage_top","text":"Calculate the percentage of top matches. Returns: Name Type Description float float The percentage of top matches compared to the total count. Source code in src/pheval/analyse/rank_stats.py 75 76 77 78 79 80 81 82 def percentage_top ( self ) -> float : \"\"\" Calculate the percentage of top matches. Returns: float: The percentage of top matches compared to the total count. \"\"\" return self . percentage_rank ( self . top )","title":"percentage_top()"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.percentage_top10","text":"Calculate the percentage of matches within the top 10. Returns: Name Type Description float float The percentage of matches within the top 10 compared to the total count. Source code in src/pheval/analyse/rank_stats.py 102 103 104 105 106 107 108 109 def percentage_top10 ( self ) -> float : \"\"\" Calculate the percentage of matches within the top 10. Returns: float: The percentage of matches within the top 10 compared to the total count. \"\"\" return self . percentage_rank ( self . top10 )","title":"percentage_top10()"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.percentage_top3","text":"Calculate the percentage of matches within the top 3. Returns: Name Type Description float float The percentage of matches within the top 3 compared to the total count. Source code in src/pheval/analyse/rank_stats.py 84 85 86 87 88 89 90 91 def percentage_top3 ( self ) -> float : \"\"\" Calculate the percentage of matches within the top 3. Returns: float: The percentage of matches within the top 3 compared to the total count. \"\"\" return self . percentage_rank ( self . top3 )","title":"percentage_top3()"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.percentage_top5","text":"Calculate the percentage of matches within the top 5. Returns: Name Type Description float float The percentage of matches within the top 5 compared to the total count. Source code in src/pheval/analyse/rank_stats.py 93 94 95 96 97 98 99 100 def percentage_top5 ( self ) -> float : \"\"\" Calculate the percentage of matches within the top 5. Returns: float: The percentage of matches within the top 5 compared to the total count. \"\"\" return self . percentage_rank ( self . top5 )","title":"percentage_top5()"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.precision_at_k","text":"Calculate the precision at k. Precision at k is the ratio of relevant items in the top-k predictions to the total number of predictions. It measures the accuracy of the top-k predictions made by a model. Parameters: Name Type Description Default k int The number of top predictions to consider. required Returns: Name Type Description float float The precision at k, ranging from 0.0 to 1.0. float A higher precision indicates a better performance in identifying relevant items in the top-k predictions. Source code in src/pheval/analyse/rank_stats.py 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 def precision_at_k ( self , k : int ) -> float : \"\"\" Calculate the precision at k. Precision at k is the ratio of relevant items in the top-k predictions to the total number of predictions. It measures the accuracy of the top-k predictions made by a model. Args: k (int): The number of top predictions to consider. Returns: float: The precision at k, ranging from 0.0 to 1.0. A higher precision indicates a better performance in identifying relevant items in the top-k predictions. \"\"\" k_attr = getattr ( self , f \"top { k } \" ) if k > 1 else self . top return k_attr / ( self . total * k )","title":"precision_at_k()"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.return_mean_reciprocal_rank","text":"Retrieve or calculate the Mean Reciprocal Rank (MRR). If a pre-calculated MRR value exists (stored in the 'mrr' attribute), this method returns that value. Otherwise, it computes the Mean Reciprocal Rank using the 'mean_reciprocal_rank' method. Returns: Name Type Description float float The Mean Reciprocal Rank value. Source code in src/pheval/analyse/rank_stats.py 153 154 155 156 157 158 159 160 161 162 163 164 165 166 def return_mean_reciprocal_rank ( self ) -> float : \"\"\" Retrieve or calculate the Mean Reciprocal Rank (MRR). If a pre-calculated MRR value exists (stored in the 'mrr' attribute), this method returns that value. Otherwise, it computes the Mean Reciprocal Rank using the 'mean_reciprocal_rank' method. Returns: float: The Mean Reciprocal Rank value. \"\"\" if self . mrr is not None : return self . mrr else : return self . mean_reciprocal_rank ()","title":"return_mean_reciprocal_rank()"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStatsWriter","text":"Class for writing the rank stats to a file. Source code in src/pheval/analyse/rank_stats.py 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 class RankStatsWriter : \"\"\"Class for writing the rank stats to a file.\"\"\" def __init__ ( self , file : Path ): \"\"\" Initialise the RankStatsWriter class Args: file (Path): Path to the file where rank stats will be written \"\"\" self . file = open ( file , \"w\" ) self . writer = csv . writer ( self . file , delimiter = \" \\t \" ) self . writer . writerow ( [ \"results_directory_path\" , \"top\" , \"top3\" , \"top5\" , \"top10\" , \"found\" , \"total\" , \"mean_reciprocal_rank\" , \"percentage_top\" , \"percentage_top3\" , \"percentage_top5\" , \"percentage_top10\" , \"percentage_found\" , \"precision@1\" , \"precision@3\" , \"precision@5\" , \"precision@10\" , \"MAP@1\" , \"MAP@3\" , \"MAP@5\" , \"MAP@10\" , \"f_beta_score@1\" , \"f_beta_score@3\" , \"f_beta_score@5\" , \"f_beta_score@10\" , \"NDCG@3\" , \"NDCG@5\" , \"NDCG@10\" , \"true_positives\" , \"false_positives\" , \"true_negatives\" , \"false_negatives\" , \"sensitivity\" , \"specificity\" , \"precision\" , \"negative_predictive_value\" , \"false_positive_rate\" , \"false_discovery_rate\" , \"false_negative_rate\" , \"accuracy\" , \"f1_score\" , \"matthews_correlation_coefficient\" , ] ) def write_row ( self , directory : Path , rank_stats : RankStats , binary_classification : BinaryClassificationStats , ) -> None : \"\"\" Write summary rank statistics row for a run to the file. Args: directory (Path): Path to the results directory corresponding to the run rank_stats (RankStats): RankStats instance containing rank statistics corresponding to the run Raises: IOError: If there is an error writing to the file. \"\"\" try : self . writer . writerow ( [ directory , rank_stats . top , rank_stats . top3 , rank_stats . top5 , rank_stats . top10 , rank_stats . found , rank_stats . total , rank_stats . mean_reciprocal_rank (), rank_stats . percentage_top (), rank_stats . percentage_top3 (), rank_stats . percentage_top5 (), rank_stats . percentage_top10 (), rank_stats . percentage_found (), rank_stats . precision_at_k ( 1 ), rank_stats . precision_at_k ( 3 ), rank_stats . precision_at_k ( 5 ), rank_stats . precision_at_k ( 10 ), rank_stats . mean_average_precision_at_k ( 1 ), rank_stats . mean_average_precision_at_k ( 3 ), rank_stats . mean_average_precision_at_k ( 5 ), rank_stats . mean_average_precision_at_k ( 10 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top (), 1 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top3 (), 3 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top5 (), 5 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top10 (), 10 ), rank_stats . mean_normalised_discounted_cumulative_gain ( 3 ), rank_stats . mean_normalised_discounted_cumulative_gain ( 5 ), rank_stats . mean_normalised_discounted_cumulative_gain ( 10 ), binary_classification . true_positives , binary_classification . false_positives , binary_classification . true_negatives , binary_classification . false_negatives , binary_classification . sensitivity (), binary_classification . specificity (), binary_classification . precision (), binary_classification . negative_predictive_value (), binary_classification . false_positive_rate (), binary_classification . false_discovery_rate (), binary_classification . false_negative_rate (), binary_classification . accuracy (), binary_classification . f1_score (), binary_classification . matthews_correlation_coefficient (), ] ) except IOError : print ( \"Error writing \" , self . file ) def close ( self ) -> None : \"\"\" Close the file used for writing rank statistics. Raises: IOError: If there's an error while closing the file. \"\"\" try : self . file . close () except IOError : print ( \"Error closing \" , self . file )","title":"RankStatsWriter"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStatsWriter.__init__","text":"Initialise the RankStatsWriter class Parameters: Name Type Description Default file Path Path to the file where rank stats will be written required Source code in src/pheval/analyse/rank_stats.py 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 def __init__ ( self , file : Path ): \"\"\" Initialise the RankStatsWriter class Args: file (Path): Path to the file where rank stats will be written \"\"\" self . file = open ( file , \"w\" ) self . writer = csv . writer ( self . file , delimiter = \" \\t \" ) self . writer . writerow ( [ \"results_directory_path\" , \"top\" , \"top3\" , \"top5\" , \"top10\" , \"found\" , \"total\" , \"mean_reciprocal_rank\" , \"percentage_top\" , \"percentage_top3\" , \"percentage_top5\" , \"percentage_top10\" , \"percentage_found\" , \"precision@1\" , \"precision@3\" , \"precision@5\" , \"precision@10\" , \"MAP@1\" , \"MAP@3\" , \"MAP@5\" , \"MAP@10\" , \"f_beta_score@1\" , \"f_beta_score@3\" , \"f_beta_score@5\" , \"f_beta_score@10\" , \"NDCG@3\" , \"NDCG@5\" , \"NDCG@10\" , \"true_positives\" , \"false_positives\" , \"true_negatives\" , \"false_negatives\" , \"sensitivity\" , \"specificity\" , \"precision\" , \"negative_predictive_value\" , \"false_positive_rate\" , \"false_discovery_rate\" , \"false_negative_rate\" , \"accuracy\" , \"f1_score\" , \"matthews_correlation_coefficient\" , ] )","title":"__init__()"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStatsWriter.close","text":"Close the file used for writing rank statistics. Raises: Type Description IOError If there's an error while closing the file. Source code in src/pheval/analyse/rank_stats.py 404 405 406 407 408 409 410 411 412 413 414 def close ( self ) -> None : \"\"\" Close the file used for writing rank statistics. Raises: IOError: If there's an error while closing the file. \"\"\" try : self . file . close () except IOError : print ( \"Error closing \" , self . file )","title":"close()"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStatsWriter.write_row","text":"Write summary rank statistics row for a run to the file. Parameters: Name Type Description Default directory Path Path to the results directory corresponding to the run required rank_stats RankStats RankStats instance containing rank statistics corresponding to the run required Raises: Type Description IOError If there is an error writing to the file. Source code in src/pheval/analyse/rank_stats.py 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 def write_row ( self , directory : Path , rank_stats : RankStats , binary_classification : BinaryClassificationStats , ) -> None : \"\"\" Write summary rank statistics row for a run to the file. Args: directory (Path): Path to the results directory corresponding to the run rank_stats (RankStats): RankStats instance containing rank statistics corresponding to the run Raises: IOError: If there is an error writing to the file. \"\"\" try : self . writer . writerow ( [ directory , rank_stats . top , rank_stats . top3 , rank_stats . top5 , rank_stats . top10 , rank_stats . found , rank_stats . total , rank_stats . mean_reciprocal_rank (), rank_stats . percentage_top (), rank_stats . percentage_top3 (), rank_stats . percentage_top5 (), rank_stats . percentage_top10 (), rank_stats . percentage_found (), rank_stats . precision_at_k ( 1 ), rank_stats . precision_at_k ( 3 ), rank_stats . precision_at_k ( 5 ), rank_stats . precision_at_k ( 10 ), rank_stats . mean_average_precision_at_k ( 1 ), rank_stats . mean_average_precision_at_k ( 3 ), rank_stats . mean_average_precision_at_k ( 5 ), rank_stats . mean_average_precision_at_k ( 10 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top (), 1 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top3 (), 3 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top5 (), 5 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top10 (), 10 ), rank_stats . mean_normalised_discounted_cumulative_gain ( 3 ), rank_stats . mean_normalised_discounted_cumulative_gain ( 5 ), rank_stats . mean_normalised_discounted_cumulative_gain ( 10 ), binary_classification . true_positives , binary_classification . false_positives , binary_classification . true_negatives , binary_classification . false_negatives , binary_classification . sensitivity (), binary_classification . specificity (), binary_classification . precision (), binary_classification . negative_predictive_value (), binary_classification . false_positive_rate (), binary_classification . false_discovery_rate (), binary_classification . false_negative_rate (), binary_classification . accuracy (), binary_classification . f1_score (), binary_classification . matthews_correlation_coefficient (), ] ) except IOError : print ( \"Error writing \" , self . file )","title":"write_row()"},{"location":"api/pheval/analyse/run_data_parser/","text":"TrackInputOutputDirectories dataclass Track the input phenopacket test data for a corresponding pheval output directory. Attributes: Name Type Description phenopacket_dir Path The directory containing input phenopackets. results_dir Path The directory containing output results from pheval. Source code in src/pheval/analyse/run_data_parser.py 8 9 10 11 12 13 14 15 16 17 18 19 @dataclass class TrackInputOutputDirectories : \"\"\" Track the input phenopacket test data for a corresponding pheval output directory. Attributes: phenopacket_dir (Path): The directory containing input phenopackets. results_dir (Path): The directory containing output results from pheval. \"\"\" phenopacket_dir : Path results_dir : Path parse_run_data_text_file ( run_data_path ) Parse run data .txt file returning a list of input phenopacket and corresponding output directories. Parameters: Name Type Description Default run_data_path Path The path to the run data .txt file. required Returns: Type Description List [ TrackInputOutputDirectories ] List[TrackInputOutputDirectories]: A list of TrackInputOutputDirectories objects, containing List [ TrackInputOutputDirectories ] input test data directories and their corresponding output directories. Notes The run data .txt file should be formatted with tab-separated values. Each row should contain two columns: the first column representing the input test data phenopacket directory, and the second column representing the corresponding run output directory. Source code in src/pheval/analyse/run_data_parser.py 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 def parse_run_data_text_file ( run_data_path : Path ) -> List [ TrackInputOutputDirectories ]: \"\"\" Parse run data .txt file returning a list of input phenopacket and corresponding output directories. Args: run_data_path (Path): The path to the run data .txt file. Returns: List[TrackInputOutputDirectories]: A list of TrackInputOutputDirectories objects, containing input test data directories and their corresponding output directories. Notes: The run data .txt file should be formatted with tab-separated values. Each row should contain two columns: the first column representing the input test data phenopacket directory, and the second column representing the corresponding run output directory. \"\"\" run_data = pd . read_csv ( run_data_path , delimiter = \" \\t \" , header = None ) run_data_list = [] for _index , row in run_data . iterrows (): run_data_list . append ( TrackInputOutputDirectories ( phenopacket_dir = Path ( row [ 0 ]), results_dir = Path ( row [ 1 ])) ) return run_data_list","title":"Run data parser"},{"location":"api/pheval/analyse/run_data_parser/#src.pheval.analyse.run_data_parser.TrackInputOutputDirectories","text":"Track the input phenopacket test data for a corresponding pheval output directory. Attributes: Name Type Description phenopacket_dir Path The directory containing input phenopackets. results_dir Path The directory containing output results from pheval. Source code in src/pheval/analyse/run_data_parser.py 8 9 10 11 12 13 14 15 16 17 18 19 @dataclass class TrackInputOutputDirectories : \"\"\" Track the input phenopacket test data for a corresponding pheval output directory. Attributes: phenopacket_dir (Path): The directory containing input phenopackets. results_dir (Path): The directory containing output results from pheval. \"\"\" phenopacket_dir : Path results_dir : Path","title":"TrackInputOutputDirectories"},{"location":"api/pheval/analyse/run_data_parser/#src.pheval.analyse.run_data_parser.parse_run_data_text_file","text":"Parse run data .txt file returning a list of input phenopacket and corresponding output directories. Parameters: Name Type Description Default run_data_path Path The path to the run data .txt file. required Returns: Type Description List [ TrackInputOutputDirectories ] List[TrackInputOutputDirectories]: A list of TrackInputOutputDirectories objects, containing List [ TrackInputOutputDirectories ] input test data directories and their corresponding output directories. Notes The run data .txt file should be formatted with tab-separated values. Each row should contain two columns: the first column representing the input test data phenopacket directory, and the second column representing the corresponding run output directory. Source code in src/pheval/analyse/run_data_parser.py 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 def parse_run_data_text_file ( run_data_path : Path ) -> List [ TrackInputOutputDirectories ]: \"\"\" Parse run data .txt file returning a list of input phenopacket and corresponding output directories. Args: run_data_path (Path): The path to the run data .txt file. Returns: List[TrackInputOutputDirectories]: A list of TrackInputOutputDirectories objects, containing input test data directories and their corresponding output directories. Notes: The run data .txt file should be formatted with tab-separated values. Each row should contain two columns: the first column representing the input test data phenopacket directory, and the second column representing the corresponding run output directory. \"\"\" run_data = pd . read_csv ( run_data_path , delimiter = \" \\t \" , header = None ) run_data_list = [] for _index , row in run_data . iterrows (): run_data_list . append ( TrackInputOutputDirectories ( phenopacket_dir = Path ( row [ 0 ]), results_dir = Path ( row [ 1 ])) ) return run_data_list","title":"parse_run_data_text_file()"},{"location":"api/pheval/analyse/variant_prioritisation_analysis/","text":"AssessVariantPrioritisation Class for assessing variant prioritisation based on thresholds and scoring orders. Source code in src/pheval/analyse/variant_prioritisation_analysis.py 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 class AssessVariantPrioritisation : \"\"\"Class for assessing variant prioritisation based on thresholds and scoring orders.\"\"\" def __init__ ( self , phenopacket_path : Path , results_dir : Path , standardised_variant_results : List [ RankedPhEvalVariantResult ], threshold : float , score_order : str , proband_causative_variants : List [ GenomicVariant ], ): \"\"\" Initialise AssessVariantPrioritisation class Args: phenopacket_path (Path): Path to the phenopacket file results_dir (Path): Path to the results directory standardised_variant_results (List[RankedPhEvalVariantResult]): List of ranked PhEval variant results threshold (float): Threshold for scores score_order (str): Score order for results, either ascending or descending proband_causative_variants (List[GenomicVariant]): List of proband variants \"\"\" self . phenopacket_path = phenopacket_path self . results_dir = results_dir self . standardised_variant_results = standardised_variant_results self . threshold = threshold self . score_order = score_order self . proband_causative_variants = proband_causative_variants def _record_variant_prioritisation_match ( self , result_entry : RankedPhEvalVariantResult , rank_stats : RankStats , ) -> VariantPrioritisationResult : \"\"\" Record the variant prioritisation rank if found within the results Args: result_entry (RankedPhEvalVariantResult): Ranked PhEval variant result entry rank_stats (RankStats): RankStats class instance Returns: VariantPrioritisationResult: Recorded correct variant prioritisation rank result \"\"\" rank = result_entry . rank rank_stats . add_rank ( rank ) return VariantPrioritisationResult ( self . phenopacket_path , GenomicVariant ( chrom = result_entry . chromosome , pos = result_entry . start , ref = result_entry . ref , alt = result_entry . alt , ), rank , ) def _assess_variant_with_threshold_ascending_order ( self , result_entry : RankedPhEvalVariantResult , rank_stats : RankStats ) -> VariantPrioritisationResult : \"\"\" Record the variant prioritisation rank if it meets the ascending order threshold. This method checks if the variant prioritisation rank meets the ascending order threshold. If the score of the result entry is less than the threshold, it records the variant rank. Args: result_entry (RankedPhEvalVariantResult): Ranked PhEval variant result entry rank_stats (RankStats): RankStats class instance Returns: VariantPrioritisationResult: Recorded correct variant prioritisation rank result \"\"\" if float ( self . threshold ) > float ( result_entry . score ): return self . _record_variant_prioritisation_match ( result_entry , rank_stats ) def _assess_variant_with_threshold ( self , result_entry : RankedPhEvalVariantResult , rank_stats : RankStats ) -> VariantPrioritisationResult : \"\"\" Record the variant prioritisation rank if it meets the score threshold. This method checks if the variant prioritisation rank meets the score threshold. If the score of the result entry is greater than the threshold, it records the variant rank. Args: result_entry (RankedPhEvalVariantResult): Ranked PhEval variant result entry rank_stats (RankStats): RankStats class instance Returns: VariantPrioritisationResult: Recorded correct variant prioritisation rank result \"\"\" if float ( self . threshold ) < float ( result_entry . score ): return self . _record_variant_prioritisation_match ( result_entry , rank_stats ) def _record_matched_variant ( self , rank_stats : RankStats , standardised_variant_result : RankedPhEvalVariantResult ) -> VariantPrioritisationResult : \"\"\" Return the variant rank result - handling the specification of a threshold. This method determines and returns the variant rank result based on the specified threshold and score order. If the threshold is 0.0, it records the variant rank directly. Otherwise, it assesses the variant with the threshold based on the score order. Args: rank_stats (RankStats): RankStats class instance standardised_variant_result (RankedPhEvalVariantResult): Ranked PhEval variant result entry Returns: VariantPrioritisationResult: Recorded correct variant prioritisation rank result \"\"\" if float ( self . threshold ) == 0.0 : return self . _record_variant_prioritisation_match ( standardised_variant_result , rank_stats ) else : return ( self . _assess_variant_with_threshold ( standardised_variant_result , rank_stats ) if self . score_order != \"ascending\" else self . _assess_variant_with_threshold_ascending_order ( standardised_variant_result , rank_stats ) ) def assess_variant_prioritisation ( self , rank_stats : RankStats , rank_records : defaultdict , binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess variant prioritisation. This method assesses the prioritisation of variants based on the provided criteria and records ranks using a PrioritisationRankRecorder. Args: rank_stats (RankStats): RankStats class instance rank_records (defaultdict): A defaultdict to store the correct ranked results. binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" relevant_ranks = [] for variant in self . proband_causative_variants : rank_stats . total += 1 variant_match = VariantPrioritisationResult ( self . phenopacket_path , variant ) for result in self . standardised_variant_results : result_variant = GenomicVariant ( chrom = str ( result . chromosome ), pos = int ( result . start ), ref = result . ref , alt = result . alt , ) if variant == result_variant : variant_match = self . _record_matched_variant ( rank_stats , result ) ( relevant_ranks . append ( variant_match . rank ) if variant_match else relevant_ranks . append ( 0 ) ) break PrioritisationRankRecorder ( rank_stats . total , self . results_dir , ( VariantPrioritisationResult ( self . phenopacket_path , variant ) if variant_match is None else variant_match ), rank_records , ) . record_rank () rank_stats . relevant_result_ranks . append ( relevant_ranks ) binary_classification_stats . add_classification ( self . standardised_variant_results , relevant_ranks ) __init__ ( phenopacket_path , results_dir , standardised_variant_results , threshold , score_order , proband_causative_variants ) Initialise AssessVariantPrioritisation class Parameters: Name Type Description Default phenopacket_path Path Path to the phenopacket file required results_dir Path Path to the results directory required standardised_variant_results List [ RankedPhEvalVariantResult ] List of ranked PhEval variant results required threshold float Threshold for scores required score_order str Score order for results, either ascending or descending required proband_causative_variants List [ GenomicVariant ] List of proband variants required Source code in src/pheval/analyse/variant_prioritisation_analysis.py 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 def __init__ ( self , phenopacket_path : Path , results_dir : Path , standardised_variant_results : List [ RankedPhEvalVariantResult ], threshold : float , score_order : str , proband_causative_variants : List [ GenomicVariant ], ): \"\"\" Initialise AssessVariantPrioritisation class Args: phenopacket_path (Path): Path to the phenopacket file results_dir (Path): Path to the results directory standardised_variant_results (List[RankedPhEvalVariantResult]): List of ranked PhEval variant results threshold (float): Threshold for scores score_order (str): Score order for results, either ascending or descending proband_causative_variants (List[GenomicVariant]): List of proband variants \"\"\" self . phenopacket_path = phenopacket_path self . results_dir = results_dir self . standardised_variant_results = standardised_variant_results self . threshold = threshold self . score_order = score_order self . proband_causative_variants = proband_causative_variants assess_variant_prioritisation ( rank_stats , rank_records , binary_classification_stats ) Assess variant prioritisation. This method assesses the prioritisation of variants based on the provided criteria and records ranks using a PrioritisationRankRecorder. Parameters: Name Type Description Default rank_stats RankStats RankStats class instance required rank_records defaultdict A defaultdict to store the correct ranked results. required binary_classification_stats BinaryClassificationStats BinaryClassificationStats class instance. required Source code in src/pheval/analyse/variant_prioritisation_analysis.py 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 def assess_variant_prioritisation ( self , rank_stats : RankStats , rank_records : defaultdict , binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess variant prioritisation. This method assesses the prioritisation of variants based on the provided criteria and records ranks using a PrioritisationRankRecorder. Args: rank_stats (RankStats): RankStats class instance rank_records (defaultdict): A defaultdict to store the correct ranked results. binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" relevant_ranks = [] for variant in self . proband_causative_variants : rank_stats . total += 1 variant_match = VariantPrioritisationResult ( self . phenopacket_path , variant ) for result in self . standardised_variant_results : result_variant = GenomicVariant ( chrom = str ( result . chromosome ), pos = int ( result . start ), ref = result . ref , alt = result . alt , ) if variant == result_variant : variant_match = self . _record_matched_variant ( rank_stats , result ) ( relevant_ranks . append ( variant_match . rank ) if variant_match else relevant_ranks . append ( 0 ) ) break PrioritisationRankRecorder ( rank_stats . total , self . results_dir , ( VariantPrioritisationResult ( self . phenopacket_path , variant ) if variant_match is None else variant_match ), rank_records , ) . record_rank () rank_stats . relevant_result_ranks . append ( relevant_ranks ) binary_classification_stats . add_classification ( self . standardised_variant_results , relevant_ranks ) assess_phenopacket_variant_prioritisation ( phenopacket_path , score_order , results_dir_and_input , threshold , variant_rank_stats , variant_rank_comparison , variant_binary_classification_stats ) Assess variant prioritisation for a Phenopacket by comparing PhEval standardised variant results against the recorded causative variants for a proband in the Phenopacket. Parameters: Name Type Description Default phenopacket_path Path Path to the Phenopacket. required score_order str The order in which scores are arranged, either ascending or descending. required results_dir_and_input TrackInputOutputDirectories Input and output directories. required threshold float Threshold for assessment. required variant_rank_stats RankStats RankStats class instance. required variant_rank_comparison defaultdict Default dictionary for variant rank comparisons. required variant_binary_classification_stats BinaryClassificationStats BinaryClassificationStats class instance. required Source code in src/pheval/analyse/variant_prioritisation_analysis.py 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 def assess_phenopacket_variant_prioritisation ( phenopacket_path : Path , score_order : str , results_dir_and_input : TrackInputOutputDirectories , threshold : float , variant_rank_stats : RankStats , variant_rank_comparison : defaultdict , variant_binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess variant prioritisation for a Phenopacket by comparing PhEval standardised variant results against the recorded causative variants for a proband in the Phenopacket. Args: phenopacket_path (Path): Path to the Phenopacket. score_order (str): The order in which scores are arranged, either ascending or descending. results_dir_and_input (TrackInputOutputDirectories): Input and output directories. threshold (float): Threshold for assessment. variant_rank_stats (RankStats): RankStats class instance. variant_rank_comparison (defaultdict): Default dictionary for variant rank comparisons. variant_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" proband_causative_variants = _obtain_causative_variants ( phenopacket_path ) standardised_variant_result = results_dir_and_input . results_dir . joinpath ( f \"pheval_variant_results/ { phenopacket_path . stem } -pheval_variant_result.tsv\" ) pheval_variant_result = read_standardised_result ( standardised_variant_result ) AssessVariantPrioritisation ( phenopacket_path , results_dir_and_input . results_dir . joinpath ( \"pheval_variant_results/\" ), parse_pheval_result ( RankedPhEvalVariantResult , pheval_variant_result ), threshold , score_order , proband_causative_variants , ) . assess_variant_prioritisation ( variant_rank_stats , variant_rank_comparison , variant_binary_classification_stats ) benchmark_variant_prioritisation ( results_directory_and_input , score_order , threshold , variant_rank_comparison ) Benchmark a directory based on variant prioritisation results. Parameters: Name Type Description Default results_directory_and_input TrackInputOutputDirectories Input and output directories. required score_order str The order in which scores are arranged. required threshold float Threshold for assessment. required variant_rank_comparison defaultdict Default dictionary for variant rank comparisons. required Returns: Name Type Description BenchmarkRunResults An object containing benchmarking results for variant prioritisation, including ranks and rank statistics for the benchmarked directory. Source code in src/pheval/analyse/variant_prioritisation_analysis.py 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 def benchmark_variant_prioritisation ( results_directory_and_input : TrackInputOutputDirectories , score_order : str , threshold : float , variant_rank_comparison : defaultdict , ): \"\"\" Benchmark a directory based on variant prioritisation results. Args: results_directory_and_input (TrackInputOutputDirectories): Input and output directories. score_order (str): The order in which scores are arranged. threshold (float): Threshold for assessment. variant_rank_comparison (defaultdict): Default dictionary for variant rank comparisons. Returns: BenchmarkRunResults: An object containing benchmarking results for variant prioritisation, including ranks and rank statistics for the benchmarked directory. \"\"\" variant_rank_stats = RankStats () variant_binary_classification_stats = BinaryClassificationStats () for phenopacket_path in all_files ( results_directory_and_input . phenopacket_dir ): assess_phenopacket_variant_prioritisation ( phenopacket_path , score_order , results_directory_and_input , threshold , variant_rank_stats , variant_rank_comparison , variant_binary_classification_stats , ) return BenchmarkRunResults ( results_dir = results_directory_and_input . results_dir , ranks = variant_rank_comparison , rank_stats = variant_rank_stats , binary_classification_stats = variant_binary_classification_stats , )","title":"Variant prioritisation analysis"},{"location":"api/pheval/analyse/variant_prioritisation_analysis/#src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioritisation","text":"Class for assessing variant prioritisation based on thresholds and scoring orders. Source code in src/pheval/analyse/variant_prioritisation_analysis.py 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 class AssessVariantPrioritisation : \"\"\"Class for assessing variant prioritisation based on thresholds and scoring orders.\"\"\" def __init__ ( self , phenopacket_path : Path , results_dir : Path , standardised_variant_results : List [ RankedPhEvalVariantResult ], threshold : float , score_order : str , proband_causative_variants : List [ GenomicVariant ], ): \"\"\" Initialise AssessVariantPrioritisation class Args: phenopacket_path (Path): Path to the phenopacket file results_dir (Path): Path to the results directory standardised_variant_results (List[RankedPhEvalVariantResult]): List of ranked PhEval variant results threshold (float): Threshold for scores score_order (str): Score order for results, either ascending or descending proband_causative_variants (List[GenomicVariant]): List of proband variants \"\"\" self . phenopacket_path = phenopacket_path self . results_dir = results_dir self . standardised_variant_results = standardised_variant_results self . threshold = threshold self . score_order = score_order self . proband_causative_variants = proband_causative_variants def _record_variant_prioritisation_match ( self , result_entry : RankedPhEvalVariantResult , rank_stats : RankStats , ) -> VariantPrioritisationResult : \"\"\" Record the variant prioritisation rank if found within the results Args: result_entry (RankedPhEvalVariantResult): Ranked PhEval variant result entry rank_stats (RankStats): RankStats class instance Returns: VariantPrioritisationResult: Recorded correct variant prioritisation rank result \"\"\" rank = result_entry . rank rank_stats . add_rank ( rank ) return VariantPrioritisationResult ( self . phenopacket_path , GenomicVariant ( chrom = result_entry . chromosome , pos = result_entry . start , ref = result_entry . ref , alt = result_entry . alt , ), rank , ) def _assess_variant_with_threshold_ascending_order ( self , result_entry : RankedPhEvalVariantResult , rank_stats : RankStats ) -> VariantPrioritisationResult : \"\"\" Record the variant prioritisation rank if it meets the ascending order threshold. This method checks if the variant prioritisation rank meets the ascending order threshold. If the score of the result entry is less than the threshold, it records the variant rank. Args: result_entry (RankedPhEvalVariantResult): Ranked PhEval variant result entry rank_stats (RankStats): RankStats class instance Returns: VariantPrioritisationResult: Recorded correct variant prioritisation rank result \"\"\" if float ( self . threshold ) > float ( result_entry . score ): return self . _record_variant_prioritisation_match ( result_entry , rank_stats ) def _assess_variant_with_threshold ( self , result_entry : RankedPhEvalVariantResult , rank_stats : RankStats ) -> VariantPrioritisationResult : \"\"\" Record the variant prioritisation rank if it meets the score threshold. This method checks if the variant prioritisation rank meets the score threshold. If the score of the result entry is greater than the threshold, it records the variant rank. Args: result_entry (RankedPhEvalVariantResult): Ranked PhEval variant result entry rank_stats (RankStats): RankStats class instance Returns: VariantPrioritisationResult: Recorded correct variant prioritisation rank result \"\"\" if float ( self . threshold ) < float ( result_entry . score ): return self . _record_variant_prioritisation_match ( result_entry , rank_stats ) def _record_matched_variant ( self , rank_stats : RankStats , standardised_variant_result : RankedPhEvalVariantResult ) -> VariantPrioritisationResult : \"\"\" Return the variant rank result - handling the specification of a threshold. This method determines and returns the variant rank result based on the specified threshold and score order. If the threshold is 0.0, it records the variant rank directly. Otherwise, it assesses the variant with the threshold based on the score order. Args: rank_stats (RankStats): RankStats class instance standardised_variant_result (RankedPhEvalVariantResult): Ranked PhEval variant result entry Returns: VariantPrioritisationResult: Recorded correct variant prioritisation rank result \"\"\" if float ( self . threshold ) == 0.0 : return self . _record_variant_prioritisation_match ( standardised_variant_result , rank_stats ) else : return ( self . _assess_variant_with_threshold ( standardised_variant_result , rank_stats ) if self . score_order != \"ascending\" else self . _assess_variant_with_threshold_ascending_order ( standardised_variant_result , rank_stats ) ) def assess_variant_prioritisation ( self , rank_stats : RankStats , rank_records : defaultdict , binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess variant prioritisation. This method assesses the prioritisation of variants based on the provided criteria and records ranks using a PrioritisationRankRecorder. Args: rank_stats (RankStats): RankStats class instance rank_records (defaultdict): A defaultdict to store the correct ranked results. binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" relevant_ranks = [] for variant in self . proband_causative_variants : rank_stats . total += 1 variant_match = VariantPrioritisationResult ( self . phenopacket_path , variant ) for result in self . standardised_variant_results : result_variant = GenomicVariant ( chrom = str ( result . chromosome ), pos = int ( result . start ), ref = result . ref , alt = result . alt , ) if variant == result_variant : variant_match = self . _record_matched_variant ( rank_stats , result ) ( relevant_ranks . append ( variant_match . rank ) if variant_match else relevant_ranks . append ( 0 ) ) break PrioritisationRankRecorder ( rank_stats . total , self . results_dir , ( VariantPrioritisationResult ( self . phenopacket_path , variant ) if variant_match is None else variant_match ), rank_records , ) . record_rank () rank_stats . relevant_result_ranks . append ( relevant_ranks ) binary_classification_stats . add_classification ( self . standardised_variant_results , relevant_ranks )","title":"AssessVariantPrioritisation"},{"location":"api/pheval/analyse/variant_prioritisation_analysis/#src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioritisation.__init__","text":"Initialise AssessVariantPrioritisation class Parameters: Name Type Description Default phenopacket_path Path Path to the phenopacket file required results_dir Path Path to the results directory required standardised_variant_results List [ RankedPhEvalVariantResult ] List of ranked PhEval variant results required threshold float Threshold for scores required score_order str Score order for results, either ascending or descending required proband_causative_variants List [ GenomicVariant ] List of proband variants required Source code in src/pheval/analyse/variant_prioritisation_analysis.py 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 def __init__ ( self , phenopacket_path : Path , results_dir : Path , standardised_variant_results : List [ RankedPhEvalVariantResult ], threshold : float , score_order : str , proband_causative_variants : List [ GenomicVariant ], ): \"\"\" Initialise AssessVariantPrioritisation class Args: phenopacket_path (Path): Path to the phenopacket file results_dir (Path): Path to the results directory standardised_variant_results (List[RankedPhEvalVariantResult]): List of ranked PhEval variant results threshold (float): Threshold for scores score_order (str): Score order for results, either ascending or descending proband_causative_variants (List[GenomicVariant]): List of proband variants \"\"\" self . phenopacket_path = phenopacket_path self . results_dir = results_dir self . standardised_variant_results = standardised_variant_results self . threshold = threshold self . score_order = score_order self . proband_causative_variants = proband_causative_variants","title":"__init__()"},{"location":"api/pheval/analyse/variant_prioritisation_analysis/#src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioritisation.assess_variant_prioritisation","text":"Assess variant prioritisation. This method assesses the prioritisation of variants based on the provided criteria and records ranks using a PrioritisationRankRecorder. Parameters: Name Type Description Default rank_stats RankStats RankStats class instance required rank_records defaultdict A defaultdict to store the correct ranked results. required binary_classification_stats BinaryClassificationStats BinaryClassificationStats class instance. required Source code in src/pheval/analyse/variant_prioritisation_analysis.py 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 def assess_variant_prioritisation ( self , rank_stats : RankStats , rank_records : defaultdict , binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess variant prioritisation. This method assesses the prioritisation of variants based on the provided criteria and records ranks using a PrioritisationRankRecorder. Args: rank_stats (RankStats): RankStats class instance rank_records (defaultdict): A defaultdict to store the correct ranked results. binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" relevant_ranks = [] for variant in self . proband_causative_variants : rank_stats . total += 1 variant_match = VariantPrioritisationResult ( self . phenopacket_path , variant ) for result in self . standardised_variant_results : result_variant = GenomicVariant ( chrom = str ( result . chromosome ), pos = int ( result . start ), ref = result . ref , alt = result . alt , ) if variant == result_variant : variant_match = self . _record_matched_variant ( rank_stats , result ) ( relevant_ranks . append ( variant_match . rank ) if variant_match else relevant_ranks . append ( 0 ) ) break PrioritisationRankRecorder ( rank_stats . total , self . results_dir , ( VariantPrioritisationResult ( self . phenopacket_path , variant ) if variant_match is None else variant_match ), rank_records , ) . record_rank () rank_stats . relevant_result_ranks . append ( relevant_ranks ) binary_classification_stats . add_classification ( self . standardised_variant_results , relevant_ranks )","title":"assess_variant_prioritisation()"},{"location":"api/pheval/analyse/variant_prioritisation_analysis/#src.pheval.analyse.variant_prioritisation_analysis.assess_phenopacket_variant_prioritisation","text":"Assess variant prioritisation for a Phenopacket by comparing PhEval standardised variant results against the recorded causative variants for a proband in the Phenopacket. Parameters: Name Type Description Default phenopacket_path Path Path to the Phenopacket. required score_order str The order in which scores are arranged, either ascending or descending. required results_dir_and_input TrackInputOutputDirectories Input and output directories. required threshold float Threshold for assessment. required variant_rank_stats RankStats RankStats class instance. required variant_rank_comparison defaultdict Default dictionary for variant rank comparisons. required variant_binary_classification_stats BinaryClassificationStats BinaryClassificationStats class instance. required Source code in src/pheval/analyse/variant_prioritisation_analysis.py 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 def assess_phenopacket_variant_prioritisation ( phenopacket_path : Path , score_order : str , results_dir_and_input : TrackInputOutputDirectories , threshold : float , variant_rank_stats : RankStats , variant_rank_comparison : defaultdict , variant_binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess variant prioritisation for a Phenopacket by comparing PhEval standardised variant results against the recorded causative variants for a proband in the Phenopacket. Args: phenopacket_path (Path): Path to the Phenopacket. score_order (str): The order in which scores are arranged, either ascending or descending. results_dir_and_input (TrackInputOutputDirectories): Input and output directories. threshold (float): Threshold for assessment. variant_rank_stats (RankStats): RankStats class instance. variant_rank_comparison (defaultdict): Default dictionary for variant rank comparisons. variant_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" proband_causative_variants = _obtain_causative_variants ( phenopacket_path ) standardised_variant_result = results_dir_and_input . results_dir . joinpath ( f \"pheval_variant_results/ { phenopacket_path . stem } -pheval_variant_result.tsv\" ) pheval_variant_result = read_standardised_result ( standardised_variant_result ) AssessVariantPrioritisation ( phenopacket_path , results_dir_and_input . results_dir . joinpath ( \"pheval_variant_results/\" ), parse_pheval_result ( RankedPhEvalVariantResult , pheval_variant_result ), threshold , score_order , proband_causative_variants , ) . assess_variant_prioritisation ( variant_rank_stats , variant_rank_comparison , variant_binary_classification_stats )","title":"assess_phenopacket_variant_prioritisation()"},{"location":"api/pheval/analyse/variant_prioritisation_analysis/#src.pheval.analyse.variant_prioritisation_analysis.benchmark_variant_prioritisation","text":"Benchmark a directory based on variant prioritisation results. Parameters: Name Type Description Default results_directory_and_input TrackInputOutputDirectories Input and output directories. required score_order str The order in which scores are arranged. required threshold float Threshold for assessment. required variant_rank_comparison defaultdict Default dictionary for variant rank comparisons. required Returns: Name Type Description BenchmarkRunResults An object containing benchmarking results for variant prioritisation, including ranks and rank statistics for the benchmarked directory. Source code in src/pheval/analyse/variant_prioritisation_analysis.py 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 def benchmark_variant_prioritisation ( results_directory_and_input : TrackInputOutputDirectories , score_order : str , threshold : float , variant_rank_comparison : defaultdict , ): \"\"\" Benchmark a directory based on variant prioritisation results. Args: results_directory_and_input (TrackInputOutputDirectories): Input and output directories. score_order (str): The order in which scores are arranged. threshold (float): Threshold for assessment. variant_rank_comparison (defaultdict): Default dictionary for variant rank comparisons. Returns: BenchmarkRunResults: An object containing benchmarking results for variant prioritisation, including ranks and rank statistics for the benchmarked directory. \"\"\" variant_rank_stats = RankStats () variant_binary_classification_stats = BinaryClassificationStats () for phenopacket_path in all_files ( results_directory_and_input . phenopacket_dir ): assess_phenopacket_variant_prioritisation ( phenopacket_path , score_order , results_directory_and_input , threshold , variant_rank_stats , variant_rank_comparison , variant_binary_classification_stats , ) return BenchmarkRunResults ( results_dir = results_directory_and_input . results_dir , ranks = variant_rank_comparison , rank_stats = variant_rank_stats , binary_classification_stats = variant_binary_classification_stats , )","title":"benchmark_variant_prioritisation()"},{"location":"api/pheval/infra/exomiserdb/","text":"DBConnection Source code in src/pheval/infra/exomiserdb.py 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 class DBConnection : connection = None def __init__ ( self , connection ): DBConnection . connection = connection @classmethod def get_connection ( cls ) -> jaydebeapi . Connection : \"\"\"Creates return new Singleton database connection\"\"\" return DBConnection . connection def close ( self ): return self . connection . close () @classmethod def get_cursor ( cls ) -> jaydebeapi . Cursor : connection = cls . get_connection () return connection . cursor () get_connection () classmethod Creates return new Singleton database connection Source code in src/pheval/infra/exomiserdb.py 49 50 51 52 @classmethod def get_connection ( cls ) -> jaydebeapi . Connection : \"\"\"Creates return new Singleton database connection\"\"\" return DBConnection . connection DBConnector Source code in src/pheval/infra/exomiserdb.py 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 class DBConnector : def __init__ ( self , jar : Path , driver : str , server : str , database : str , user : str , password : str ): self . jar = jar self . driver = driver self . server = server self . database = database self . user = user self . password = password self . dbconn = None def create_connection ( self ) -> jaydebeapi . Connection : \"\"\"creates h2 database connection\"\"\" return jaydebeapi . connect ( self . driver , f \" { self . server }{ self . database } \" , [ self . user , self . password ], self . jar , ) def __enter__ ( self ) -> jaydebeapi . Connection : self . dbconn = self . create_connection () return self . dbconn def __exit__ ( self , * other ): self . dbconn . close () create_connection () creates h2 database connection Source code in src/pheval/infra/exomiserdb.py 26 27 28 29 30 31 32 33 def create_connection ( self ) -> jaydebeapi . Connection : \"\"\"creates h2 database connection\"\"\" return jaydebeapi . connect ( self . driver , f \" { self . server }{ self . database } \" , [ self . user , self . password ], self . jar , ) ExomiserDB Source code in src/pheval/infra/exomiserdb.py 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 class ExomiserDB : def __init__ ( self , db_path : Path ): try : self . connector = DBConnector ( # noqa jar = os . path . join ( os . path . dirname ( __file__ ), \"../../../lib/h2-1.4.199.jar\" ), driver = \"org.h2.Driver\" , server = f \"jdbc:h2: { db_path } \" , user = \"sa\" , password = \"\" , database = \"\" , ) except Exception as e : print ( \"An exception occurred\" , e ) def import_from_semsim_file ( self , input_file : Path , subject_prefix : str , object_prefix : str ): \"\"\"imports semsim tsv profile into exomiser phenotype database Args: input_file (Path): semsim profile subject_prefix (str): Subject Prefix. e.g HP object_prefix (str): Object Prefix. e.g MP \"\"\" with self . connector as cnn : conn = DBConnection ( cnn ) reader = pl . read_csv_batched ( input_file , separator = \" \\t \" ) batch_length = 5 batches = reader . next_batches ( batch_length ) cursor = conn . get_cursor () # # TODO: Refactor this with open ( input_file , \"r\" ) as f : total = sum ( 1 for line in f ) pbar = tqdm ( total = total - 1 ) mapping_id = 1 while batches : input_data = pl . concat ( batches ) sql = _semsim2h2 ( input_data , object_prefix , subject_prefix , mapping_id = mapping_id ) cursor . execute ( sql ) len_input_data = len ( input_data ) mapping_id += len_input_data pbar . update ( len_input_data ) batches = reader . next_batches ( batch_length ) import_from_semsim_file ( input_file , subject_prefix , object_prefix ) imports semsim tsv profile into exomiser phenotype database Parameters: Name Type Description Default input_file Path semsim profile required subject_prefix str Subject Prefix. e.g HP required object_prefix str Object Prefix. e.g MP required Source code in src/pheval/infra/exomiserdb.py 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 def import_from_semsim_file ( self , input_file : Path , subject_prefix : str , object_prefix : str ): \"\"\"imports semsim tsv profile into exomiser phenotype database Args: input_file (Path): semsim profile subject_prefix (str): Subject Prefix. e.g HP object_prefix (str): Object Prefix. e.g MP \"\"\" with self . connector as cnn : conn = DBConnection ( cnn ) reader = pl . read_csv_batched ( input_file , separator = \" \\t \" ) batch_length = 5 batches = reader . next_batches ( batch_length ) cursor = conn . get_cursor () # # TODO: Refactor this with open ( input_file , \"r\" ) as f : total = sum ( 1 for line in f ) pbar = tqdm ( total = total - 1 ) mapping_id = 1 while batches : input_data = pl . concat ( batches ) sql = _semsim2h2 ( input_data , object_prefix , subject_prefix , mapping_id = mapping_id ) cursor . execute ( sql ) len_input_data = len ( input_data ) mapping_id += len_input_data pbar . update ( len_input_data ) batches = reader . next_batches ( batch_length )","title":"Exomiserdb"},{"location":"api/pheval/infra/exomiserdb/#src.pheval.infra.exomiserdb.DBConnection","text":"Source code in src/pheval/infra/exomiserdb.py 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 class DBConnection : connection = None def __init__ ( self , connection ): DBConnection . connection = connection @classmethod def get_connection ( cls ) -> jaydebeapi . Connection : \"\"\"Creates return new Singleton database connection\"\"\" return DBConnection . connection def close ( self ): return self . connection . close () @classmethod def get_cursor ( cls ) -> jaydebeapi . Cursor : connection = cls . get_connection () return connection . cursor ()","title":"DBConnection"},{"location":"api/pheval/infra/exomiserdb/#src.pheval.infra.exomiserdb.DBConnection.get_connection","text":"Creates return new Singleton database connection Source code in src/pheval/infra/exomiserdb.py 49 50 51 52 @classmethod def get_connection ( cls ) -> jaydebeapi . Connection : \"\"\"Creates return new Singleton database connection\"\"\" return DBConnection . connection","title":"get_connection()"},{"location":"api/pheval/infra/exomiserdb/#src.pheval.infra.exomiserdb.DBConnector","text":"Source code in src/pheval/infra/exomiserdb.py 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 class DBConnector : def __init__ ( self , jar : Path , driver : str , server : str , database : str , user : str , password : str ): self . jar = jar self . driver = driver self . server = server self . database = database self . user = user self . password = password self . dbconn = None def create_connection ( self ) -> jaydebeapi . Connection : \"\"\"creates h2 database connection\"\"\" return jaydebeapi . connect ( self . driver , f \" { self . server }{ self . database } \" , [ self . user , self . password ], self . jar , ) def __enter__ ( self ) -> jaydebeapi . Connection : self . dbconn = self . create_connection () return self . dbconn def __exit__ ( self , * other ): self . dbconn . close ()","title":"DBConnector"},{"location":"api/pheval/infra/exomiserdb/#src.pheval.infra.exomiserdb.DBConnector.create_connection","text":"creates h2 database connection Source code in src/pheval/infra/exomiserdb.py 26 27 28 29 30 31 32 33 def create_connection ( self ) -> jaydebeapi . Connection : \"\"\"creates h2 database connection\"\"\" return jaydebeapi . connect ( self . driver , f \" { self . server }{ self . database } \" , [ self . user , self . password ], self . jar , )","title":"create_connection()"},{"location":"api/pheval/infra/exomiserdb/#src.pheval.infra.exomiserdb.ExomiserDB","text":"Source code in src/pheval/infra/exomiserdb.py 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 class ExomiserDB : def __init__ ( self , db_path : Path ): try : self . connector = DBConnector ( # noqa jar = os . path . join ( os . path . dirname ( __file__ ), \"../../../lib/h2-1.4.199.jar\" ), driver = \"org.h2.Driver\" , server = f \"jdbc:h2: { db_path } \" , user = \"sa\" , password = \"\" , database = \"\" , ) except Exception as e : print ( \"An exception occurred\" , e ) def import_from_semsim_file ( self , input_file : Path , subject_prefix : str , object_prefix : str ): \"\"\"imports semsim tsv profile into exomiser phenotype database Args: input_file (Path): semsim profile subject_prefix (str): Subject Prefix. e.g HP object_prefix (str): Object Prefix. e.g MP \"\"\" with self . connector as cnn : conn = DBConnection ( cnn ) reader = pl . read_csv_batched ( input_file , separator = \" \\t \" ) batch_length = 5 batches = reader . next_batches ( batch_length ) cursor = conn . get_cursor () # # TODO: Refactor this with open ( input_file , \"r\" ) as f : total = sum ( 1 for line in f ) pbar = tqdm ( total = total - 1 ) mapping_id = 1 while batches : input_data = pl . concat ( batches ) sql = _semsim2h2 ( input_data , object_prefix , subject_prefix , mapping_id = mapping_id ) cursor . execute ( sql ) len_input_data = len ( input_data ) mapping_id += len_input_data pbar . update ( len_input_data ) batches = reader . next_batches ( batch_length )","title":"ExomiserDB"},{"location":"api/pheval/infra/exomiserdb/#src.pheval.infra.exomiserdb.ExomiserDB.import_from_semsim_file","text":"imports semsim tsv profile into exomiser phenotype database Parameters: Name Type Description Default input_file Path semsim profile required subject_prefix str Subject Prefix. e.g HP required object_prefix str Object Prefix. e.g MP required Source code in src/pheval/infra/exomiserdb.py 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 def import_from_semsim_file ( self , input_file : Path , subject_prefix : str , object_prefix : str ): \"\"\"imports semsim tsv profile into exomiser phenotype database Args: input_file (Path): semsim profile subject_prefix (str): Subject Prefix. e.g HP object_prefix (str): Object Prefix. e.g MP \"\"\" with self . connector as cnn : conn = DBConnection ( cnn ) reader = pl . read_csv_batched ( input_file , separator = \" \\t \" ) batch_length = 5 batches = reader . next_batches ( batch_length ) cursor = conn . get_cursor () # # TODO: Refactor this with open ( input_file , \"r\" ) as f : total = sum ( 1 for line in f ) pbar = tqdm ( total = total - 1 ) mapping_id = 1 while batches : input_data = pl . concat ( batches ) sql = _semsim2h2 ( input_data , object_prefix , subject_prefix , mapping_id = mapping_id ) cursor . execute ( sql ) len_input_data = len ( input_data ) mapping_id += len_input_data pbar . update ( len_input_data ) batches = reader . next_batches ( batch_length )","title":"import_from_semsim_file()"},{"location":"api/pheval/post_processing/post_processing/","text":"PhEvalDiseaseResult dataclass Bases: PhEvalResult Minimal data required from tool-specific output for disease prioritisation Parameters: Name Type Description Default disease_name str Disease name for the result entry required disease_identifier str Identifier for the disease result entry in the OMIM namespace required score str Score for the disease result entry required Notes While we recommend providing the disease identifier in the OMIM namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. Source code in src/pheval/post_processing/post_processing.py 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 @dataclass class PhEvalDiseaseResult ( PhEvalResult ): \"\"\"Minimal data required from tool-specific output for disease prioritisation Args: disease_name (str): Disease name for the result entry disease_identifier (str): Identifier for the disease result entry in the OMIM namespace score (str): Score for the disease result entry Notes: While we recommend providing the disease identifier in the OMIM namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. \"\"\" disease_name : str disease_identifier : str score : float PhEvalGeneResult dataclass Bases: PhEvalResult Minimal data required from tool-specific output for gene prioritisation result Parameters: Name Type Description Default gene_symbol Union [ List [ str ], str ] The gene symbol(s) for the result entry required gene_identifier Union [ List [ str ], str ] The ENSEMBL gene identifier(s) for the result entry required score float The score for the gene result entry required Notes While we recommend providing the gene identifier in the ENSEMBL namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. Source code in src/pheval/post_processing/post_processing.py 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 @dataclass class PhEvalGeneResult ( PhEvalResult ): \"\"\"Minimal data required from tool-specific output for gene prioritisation result Args: gene_symbol (Union[List[str], str]): The gene symbol(s) for the result entry gene_identifier (Union[List[str], str]): The ENSEMBL gene identifier(s) for the result entry score (float): The score for the gene result entry Notes: While we recommend providing the gene identifier in the ENSEMBL namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. \"\"\" gene_symbol : Union [ List [ str ], str ] gene_identifier : Union [ List [ str ], str ] score : float PhEvalResult dataclass Base class for PhEval results. Source code in src/pheval/post_processing/post_processing.py 25 26 27 @dataclass class PhEvalResult : \"\"\"Base class for PhEval results.\"\"\" PhEvalVariantResult dataclass Bases: PhEvalResult Minimal data required from tool-specific output for variant prioritisation Parameters: Name Type Description Default chromosome str The chromosome position of the variant recommended to be provided in the following format. required start int The start position of the variant required end int The end position of the variant required ref str The reference allele of the variant required alt str The alternate allele of the variant required score float The score for the variant result entry required Notes While we recommend providing the variant's chromosome in the specified format, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. Source code in src/pheval/post_processing/post_processing.py 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 @dataclass class PhEvalVariantResult ( PhEvalResult ): \"\"\"Minimal data required from tool-specific output for variant prioritisation Args: chromosome (str): The chromosome position of the variant recommended to be provided in the following format. This includes numerical designations from 1 to 22 representing autosomal chromosomes, as well as the sex chromosomes X and Y, and the mitochondrial chromosome MT. start (int): The start position of the variant end (int): The end position of the variant ref (str): The reference allele of the variant alt (str): The alternate allele of the variant score (float): The score for the variant result entry Notes: While we recommend providing the variant's chromosome in the specified format, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. \"\"\" chromosome : str start : int end : int ref : str alt : str score : float RankedPhEvalDiseaseResult dataclass Bases: PhEvalDiseaseResult PhEval disease result with corresponding rank Parameters: Name Type Description Default rank int The rank for the result entry required Source code in src/pheval/post_processing/post_processing.py 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 @dataclass class RankedPhEvalDiseaseResult ( PhEvalDiseaseResult ): \"\"\"PhEval disease result with corresponding rank Args: rank (int): The rank for the result entry \"\"\" rank : int @staticmethod def from_disease_result ( pheval_disease_result : PhEvalDiseaseResult , rank : int ): \"\"\"Return RankedPhEvalDiseaseResult from a PhEvalDiseaseResult and rank Args: pheval_disease_result (PhEvalDiseaseResult): The disease result entry rank (int): The corresponding rank for the result entry Returns: RankedPhEvalDiseaseResult: The result as a RankedPhEvalDiseaseResult \"\"\" return RankedPhEvalDiseaseResult ( disease_name = pheval_disease_result . disease_name , disease_identifier = pheval_disease_result . disease_identifier , score = pheval_disease_result . score , rank = rank , ) from_disease_result ( pheval_disease_result , rank ) staticmethod Return RankedPhEvalDiseaseResult from a PhEvalDiseaseResult and rank Parameters: Name Type Description Default pheval_disease_result PhEvalDiseaseResult The disease result entry required rank int The corresponding rank for the result entry required Returns: Name Type Description RankedPhEvalDiseaseResult The result as a RankedPhEvalDiseaseResult Source code in src/pheval/post_processing/post_processing.py 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 @staticmethod def from_disease_result ( pheval_disease_result : PhEvalDiseaseResult , rank : int ): \"\"\"Return RankedPhEvalDiseaseResult from a PhEvalDiseaseResult and rank Args: pheval_disease_result (PhEvalDiseaseResult): The disease result entry rank (int): The corresponding rank for the result entry Returns: RankedPhEvalDiseaseResult: The result as a RankedPhEvalDiseaseResult \"\"\" return RankedPhEvalDiseaseResult ( disease_name = pheval_disease_result . disease_name , disease_identifier = pheval_disease_result . disease_identifier , score = pheval_disease_result . score , rank = rank , ) RankedPhEvalGeneResult dataclass Bases: PhEvalGeneResult PhEval gene result with corresponding rank Parameters: Name Type Description Default rank int The rank for the result entry required Source code in src/pheval/post_processing/post_processing.py 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 @dataclass class RankedPhEvalGeneResult ( PhEvalGeneResult ): \"\"\"PhEval gene result with corresponding rank Args: rank (int): The rank for the result entry \"\"\" rank : int @staticmethod def from_gene_result ( pheval_gene_result : PhEvalGeneResult , rank : int ): \"\"\"Return RankedPhEvalGeneResult from a PhEvalGeneResult and rank Args: pheval_gene_result (PhEvalGeneResult): The gene result entry rank (int): The corresponding rank for the result entry Returns: RankedPhEvalGeneResult: The result as a RankedPhEvalGeneResult \"\"\" return RankedPhEvalGeneResult ( gene_symbol = pheval_gene_result . gene_symbol , gene_identifier = pheval_gene_result . gene_identifier , score = pheval_gene_result . score , rank = rank , ) from_gene_result ( pheval_gene_result , rank ) staticmethod Return RankedPhEvalGeneResult from a PhEvalGeneResult and rank Parameters: Name Type Description Default pheval_gene_result PhEvalGeneResult The gene result entry required rank int The corresponding rank for the result entry required Returns: Name Type Description RankedPhEvalGeneResult The result as a RankedPhEvalGeneResult Source code in src/pheval/post_processing/post_processing.py 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 @staticmethod def from_gene_result ( pheval_gene_result : PhEvalGeneResult , rank : int ): \"\"\"Return RankedPhEvalGeneResult from a PhEvalGeneResult and rank Args: pheval_gene_result (PhEvalGeneResult): The gene result entry rank (int): The corresponding rank for the result entry Returns: RankedPhEvalGeneResult: The result as a RankedPhEvalGeneResult \"\"\" return RankedPhEvalGeneResult ( gene_symbol = pheval_gene_result . gene_symbol , gene_identifier = pheval_gene_result . gene_identifier , score = pheval_gene_result . score , rank = rank , ) RankedPhEvalVariantResult dataclass Bases: PhEvalVariantResult PhEval variant result with corresponding rank Parameters: Name Type Description Default rank int The rank for the result entry required Source code in src/pheval/post_processing/post_processing.py 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 @dataclass class RankedPhEvalVariantResult ( PhEvalVariantResult ): \"\"\"PhEval variant result with corresponding rank Args: rank (int): The rank for the result entry \"\"\" rank : int @staticmethod def from_variant_result ( pheval_variant_result : PhEvalVariantResult , rank : int ): \"\"\"Return RankedPhEvalVariantResult from a PhEvalVariantResult and rank Args: pheval_variant_result (PhEvalVariantResult): The variant result entry rank (int): The corresponding rank for the result entry Returns: RankedPhEvalVariantResult: The result as a RankedPhEvalVariantResult \"\"\" return RankedPhEvalVariantResult ( chromosome = pheval_variant_result . chromosome , start = pheval_variant_result . start , end = pheval_variant_result . end , ref = pheval_variant_result . ref , alt = pheval_variant_result . alt , score = pheval_variant_result . score , rank = rank , ) from_variant_result ( pheval_variant_result , rank ) staticmethod Return RankedPhEvalVariantResult from a PhEvalVariantResult and rank Parameters: Name Type Description Default pheval_variant_result PhEvalVariantResult The variant result entry required rank int The corresponding rank for the result entry required Returns: Name Type Description RankedPhEvalVariantResult The result as a RankedPhEvalVariantResult Source code in src/pheval/post_processing/post_processing.py 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 @staticmethod def from_variant_result ( pheval_variant_result : PhEvalVariantResult , rank : int ): \"\"\"Return RankedPhEvalVariantResult from a PhEvalVariantResult and rank Args: pheval_variant_result (PhEvalVariantResult): The variant result entry rank (int): The corresponding rank for the result entry Returns: RankedPhEvalVariantResult: The result as a RankedPhEvalVariantResult \"\"\" return RankedPhEvalVariantResult ( chromosome = pheval_variant_result . chromosome , start = pheval_variant_result . start , end = pheval_variant_result . end , ref = pheval_variant_result . ref , alt = pheval_variant_result . alt , score = pheval_variant_result . score , rank = rank , ) ResultSorter Class for sorting PhEvalResult instances based on a given sort order. Source code in src/pheval/post_processing/post_processing.py 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 class ResultSorter : \"\"\"Class for sorting PhEvalResult instances based on a given sort order.\"\"\" def __init__ ( self , pheval_results : [ PhEvalResult ], sort_order : SortOrder ): \"\"\" Initialise ResultSorter Args: pheval_results ([PhEvalResult]): List of PhEvalResult instances to be sorted sort_order (SortOrder): Sorting order to be applied \"\"\" self . pheval_results = pheval_results self . sort_order = sort_order def _sort_by_decreasing_score ( self ) -> [ PhEvalResult ]: \"\"\" Sort results in descending order based on the score Returns: [PhEvalResult]: Sorted list of PhEvalResult instances. \"\"\" return sorted ( self . pheval_results , key = operator . attrgetter ( \"score\" ), reverse = True ) def _sort_by_increasing_score ( self ) -> [ PhEvalResult ]: \"\"\" Sort results in ascending order based on the score Returns: [PhEvalResult]: Sorted list of PhEvalResult instances. \"\"\" return sorted ( self . pheval_results , key = operator . attrgetter ( \"score\" ), reverse = False ) def sort_pheval_results ( self ) -> [ PhEvalResult ]: \"\"\" Sort results based on the specified sort order. Returns: [PhEvalResult]: Sorted list of PhEvalResult instances. \"\"\" return ( self . _sort_by_increasing_score () if self . sort_order == SortOrder . ASCENDING else self . _sort_by_decreasing_score () ) __init__ ( pheval_results , sort_order ) Initialise ResultSorter Parameters: Name Type Description Default pheval_results [ PhEvalResult ] List of PhEvalResult instances to be sorted required sort_order SortOrder Sorting order to be applied required Source code in src/pheval/post_processing/post_processing.py 188 189 190 191 192 193 194 195 196 197 def __init__ ( self , pheval_results : [ PhEvalResult ], sort_order : SortOrder ): \"\"\" Initialise ResultSorter Args: pheval_results ([PhEvalResult]): List of PhEvalResult instances to be sorted sort_order (SortOrder): Sorting order to be applied \"\"\" self . pheval_results = pheval_results self . sort_order = sort_order sort_pheval_results () Sort results based on the specified sort order. Returns: Type Description [ PhEvalResult ] [PhEvalResult]: Sorted list of PhEvalResult instances. Source code in src/pheval/post_processing/post_processing.py 217 218 219 220 221 222 223 224 225 226 227 228 def sort_pheval_results ( self ) -> [ PhEvalResult ]: \"\"\" Sort results based on the specified sort order. Returns: [PhEvalResult]: Sorted list of PhEvalResult instances. \"\"\" return ( self . _sort_by_increasing_score () if self . sort_order == SortOrder . ASCENDING else self . _sort_by_decreasing_score () ) SortOrder Bases: Enum Enumeration representing sorting orders. Source code in src/pheval/post_processing/post_processing.py 176 177 178 179 180 181 182 class SortOrder ( Enum ): \"\"\"Enumeration representing sorting orders.\"\"\" ASCENDING = 1 \"\"\"Ascending sort order.\"\"\" DESCENDING = 2 \"\"\"Descending sort order.\"\"\" ASCENDING = 1 class-attribute Ascending sort order. DESCENDING = 2 class-attribute Descending sort order. calculate_end_pos ( variant_start , variant_ref ) Calculate the end position for a variant Parameters: Name Type Description Default variant_start int The start position of the variant required variant_ref str The reference allele of the variant required Returns: Name Type Description int int The end position of the variant Source code in src/pheval/post_processing/post_processing.py 13 14 15 16 17 18 19 20 21 22 def calculate_end_pos ( variant_start : int , variant_ref : str ) -> int : \"\"\"Calculate the end position for a variant Args: variant_start (int): The start position of the variant variant_ref (str): The reference allele of the variant Returns: int: The end position of the variant \"\"\" return variant_start + len ( variant_ref ) - 1 generate_pheval_result ( pheval_result , sort_order_str , output_dir , tool_result_path ) Generate PhEval variant, gene or disease TSV result based on input results. Parameters: Name Type Description Default pheval_result [ PhEvalResult ] List of PhEvalResult instances to be processed. required sort_order_str str String representation of the desired sorting order. required output_dir Path Path to the output directory. required tool_result_path Path Path to the tool-specific result file. required Raises: Type Description ValueError If the results are not all the same type or an error occurs during file writing. Source code in src/pheval/post_processing/post_processing.py 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 def generate_pheval_result ( pheval_result : [ PhEvalResult ], sort_order_str : str , output_dir : Path , tool_result_path : Path , ) -> None : \"\"\" Generate PhEval variant, gene or disease TSV result based on input results. Args: pheval_result ([PhEvalResult]): List of PhEvalResult instances to be processed. sort_order_str (str): String representation of the desired sorting order. output_dir (Path): Path to the output directory. tool_result_path (Path): Path to the tool-specific result file. Raises: ValueError: If the results are not all the same type or an error occurs during file writing. \"\"\" if not pheval_result : info_log . warning ( f \"No results found for { tool_result_path . name } \" ) return ranked_pheval_result = _create_pheval_result ( pheval_result , sort_order_str ) if all ( isinstance ( result , PhEvalGeneResult ) for result in pheval_result ): _write_pheval_gene_result ( ranked_pheval_result , output_dir , tool_result_path ) elif all ( isinstance ( result , PhEvalVariantResult ) for result in pheval_result ): _write_pheval_variant_result ( ranked_pheval_result , output_dir , tool_result_path ) elif all ( isinstance ( result , PhEvalDiseaseResult ) for result in pheval_result ): _write_pheval_disease_result ( ranked_pheval_result , output_dir , tool_result_path ) else : raise ValueError ( \"Results are not all of the same type.\" )","title":"Post processing"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.PhEvalDiseaseResult","text":"Bases: PhEvalResult Minimal data required from tool-specific output for disease prioritisation Parameters: Name Type Description Default disease_name str Disease name for the result entry required disease_identifier str Identifier for the disease result entry in the OMIM namespace required score str Score for the disease result entry required Notes While we recommend providing the disease identifier in the OMIM namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. Source code in src/pheval/post_processing/post_processing.py 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 @dataclass class PhEvalDiseaseResult ( PhEvalResult ): \"\"\"Minimal data required from tool-specific output for disease prioritisation Args: disease_name (str): Disease name for the result entry disease_identifier (str): Identifier for the disease result entry in the OMIM namespace score (str): Score for the disease result entry Notes: While we recommend providing the disease identifier in the OMIM namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. \"\"\" disease_name : str disease_identifier : str score : float","title":"PhEvalDiseaseResult"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.PhEvalGeneResult","text":"Bases: PhEvalResult Minimal data required from tool-specific output for gene prioritisation result Parameters: Name Type Description Default gene_symbol Union [ List [ str ], str ] The gene symbol(s) for the result entry required gene_identifier Union [ List [ str ], str ] The ENSEMBL gene identifier(s) for the result entry required score float The score for the gene result entry required Notes While we recommend providing the gene identifier in the ENSEMBL namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. Source code in src/pheval/post_processing/post_processing.py 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 @dataclass class PhEvalGeneResult ( PhEvalResult ): \"\"\"Minimal data required from tool-specific output for gene prioritisation result Args: gene_symbol (Union[List[str], str]): The gene symbol(s) for the result entry gene_identifier (Union[List[str], str]): The ENSEMBL gene identifier(s) for the result entry score (float): The score for the gene result entry Notes: While we recommend providing the gene identifier in the ENSEMBL namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. \"\"\" gene_symbol : Union [ List [ str ], str ] gene_identifier : Union [ List [ str ], str ] score : float","title":"PhEvalGeneResult"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.PhEvalResult","text":"Base class for PhEval results. Source code in src/pheval/post_processing/post_processing.py 25 26 27 @dataclass class PhEvalResult : \"\"\"Base class for PhEval results.\"\"\"","title":"PhEvalResult"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.PhEvalVariantResult","text":"Bases: PhEvalResult Minimal data required from tool-specific output for variant prioritisation Parameters: Name Type Description Default chromosome str The chromosome position of the variant recommended to be provided in the following format. required start int The start position of the variant required end int The end position of the variant required ref str The reference allele of the variant required alt str The alternate allele of the variant required score float The score for the variant result entry required Notes While we recommend providing the variant's chromosome in the specified format, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. Source code in src/pheval/post_processing/post_processing.py 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 @dataclass class PhEvalVariantResult ( PhEvalResult ): \"\"\"Minimal data required from tool-specific output for variant prioritisation Args: chromosome (str): The chromosome position of the variant recommended to be provided in the following format. This includes numerical designations from 1 to 22 representing autosomal chromosomes, as well as the sex chromosomes X and Y, and the mitochondrial chromosome MT. start (int): The start position of the variant end (int): The end position of the variant ref (str): The reference allele of the variant alt (str): The alternate allele of the variant score (float): The score for the variant result entry Notes: While we recommend providing the variant's chromosome in the specified format, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. \"\"\" chromosome : str start : int end : int ref : str alt : str score : float","title":"PhEvalVariantResult"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.RankedPhEvalDiseaseResult","text":"Bases: PhEvalDiseaseResult PhEval disease result with corresponding rank Parameters: Name Type Description Default rank int The rank for the result entry required Source code in src/pheval/post_processing/post_processing.py 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 @dataclass class RankedPhEvalDiseaseResult ( PhEvalDiseaseResult ): \"\"\"PhEval disease result with corresponding rank Args: rank (int): The rank for the result entry \"\"\" rank : int @staticmethod def from_disease_result ( pheval_disease_result : PhEvalDiseaseResult , rank : int ): \"\"\"Return RankedPhEvalDiseaseResult from a PhEvalDiseaseResult and rank Args: pheval_disease_result (PhEvalDiseaseResult): The disease result entry rank (int): The corresponding rank for the result entry Returns: RankedPhEvalDiseaseResult: The result as a RankedPhEvalDiseaseResult \"\"\" return RankedPhEvalDiseaseResult ( disease_name = pheval_disease_result . disease_name , disease_identifier = pheval_disease_result . disease_identifier , score = pheval_disease_result . score , rank = rank , )","title":"RankedPhEvalDiseaseResult"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.RankedPhEvalDiseaseResult.from_disease_result","text":"Return RankedPhEvalDiseaseResult from a PhEvalDiseaseResult and rank Parameters: Name Type Description Default pheval_disease_result PhEvalDiseaseResult The disease result entry required rank int The corresponding rank for the result entry required Returns: Name Type Description RankedPhEvalDiseaseResult The result as a RankedPhEvalDiseaseResult Source code in src/pheval/post_processing/post_processing.py 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 @staticmethod def from_disease_result ( pheval_disease_result : PhEvalDiseaseResult , rank : int ): \"\"\"Return RankedPhEvalDiseaseResult from a PhEvalDiseaseResult and rank Args: pheval_disease_result (PhEvalDiseaseResult): The disease result entry rank (int): The corresponding rank for the result entry Returns: RankedPhEvalDiseaseResult: The result as a RankedPhEvalDiseaseResult \"\"\" return RankedPhEvalDiseaseResult ( disease_name = pheval_disease_result . disease_name , disease_identifier = pheval_disease_result . disease_identifier , score = pheval_disease_result . score , rank = rank , )","title":"from_disease_result()"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.RankedPhEvalGeneResult","text":"Bases: PhEvalGeneResult PhEval gene result with corresponding rank Parameters: Name Type Description Default rank int The rank for the result entry required Source code in src/pheval/post_processing/post_processing.py 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 @dataclass class RankedPhEvalGeneResult ( PhEvalGeneResult ): \"\"\"PhEval gene result with corresponding rank Args: rank (int): The rank for the result entry \"\"\" rank : int @staticmethod def from_gene_result ( pheval_gene_result : PhEvalGeneResult , rank : int ): \"\"\"Return RankedPhEvalGeneResult from a PhEvalGeneResult and rank Args: pheval_gene_result (PhEvalGeneResult): The gene result entry rank (int): The corresponding rank for the result entry Returns: RankedPhEvalGeneResult: The result as a RankedPhEvalGeneResult \"\"\" return RankedPhEvalGeneResult ( gene_symbol = pheval_gene_result . gene_symbol , gene_identifier = pheval_gene_result . gene_identifier , score = pheval_gene_result . score , rank = rank , )","title":"RankedPhEvalGeneResult"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.RankedPhEvalGeneResult.from_gene_result","text":"Return RankedPhEvalGeneResult from a PhEvalGeneResult and rank Parameters: Name Type Description Default pheval_gene_result PhEvalGeneResult The gene result entry required rank int The corresponding rank for the result entry required Returns: Name Type Description RankedPhEvalGeneResult The result as a RankedPhEvalGeneResult Source code in src/pheval/post_processing/post_processing.py 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 @staticmethod def from_gene_result ( pheval_gene_result : PhEvalGeneResult , rank : int ): \"\"\"Return RankedPhEvalGeneResult from a PhEvalGeneResult and rank Args: pheval_gene_result (PhEvalGeneResult): The gene result entry rank (int): The corresponding rank for the result entry Returns: RankedPhEvalGeneResult: The result as a RankedPhEvalGeneResult \"\"\" return RankedPhEvalGeneResult ( gene_symbol = pheval_gene_result . gene_symbol , gene_identifier = pheval_gene_result . gene_identifier , score = pheval_gene_result . score , rank = rank , )","title":"from_gene_result()"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.RankedPhEvalVariantResult","text":"Bases: PhEvalVariantResult PhEval variant result with corresponding rank Parameters: Name Type Description Default rank int The rank for the result entry required Source code in src/pheval/post_processing/post_processing.py 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 @dataclass class RankedPhEvalVariantResult ( PhEvalVariantResult ): \"\"\"PhEval variant result with corresponding rank Args: rank (int): The rank for the result entry \"\"\" rank : int @staticmethod def from_variant_result ( pheval_variant_result : PhEvalVariantResult , rank : int ): \"\"\"Return RankedPhEvalVariantResult from a PhEvalVariantResult and rank Args: pheval_variant_result (PhEvalVariantResult): The variant result entry rank (int): The corresponding rank for the result entry Returns: RankedPhEvalVariantResult: The result as a RankedPhEvalVariantResult \"\"\" return RankedPhEvalVariantResult ( chromosome = pheval_variant_result . chromosome , start = pheval_variant_result . start , end = pheval_variant_result . end , ref = pheval_variant_result . ref , alt = pheval_variant_result . alt , score = pheval_variant_result . score , rank = rank , )","title":"RankedPhEvalVariantResult"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.RankedPhEvalVariantResult.from_variant_result","text":"Return RankedPhEvalVariantResult from a PhEvalVariantResult and rank Parameters: Name Type Description Default pheval_variant_result PhEvalVariantResult The variant result entry required rank int The corresponding rank for the result entry required Returns: Name Type Description RankedPhEvalVariantResult The result as a RankedPhEvalVariantResult Source code in src/pheval/post_processing/post_processing.py 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 @staticmethod def from_variant_result ( pheval_variant_result : PhEvalVariantResult , rank : int ): \"\"\"Return RankedPhEvalVariantResult from a PhEvalVariantResult and rank Args: pheval_variant_result (PhEvalVariantResult): The variant result entry rank (int): The corresponding rank for the result entry Returns: RankedPhEvalVariantResult: The result as a RankedPhEvalVariantResult \"\"\" return RankedPhEvalVariantResult ( chromosome = pheval_variant_result . chromosome , start = pheval_variant_result . start , end = pheval_variant_result . end , ref = pheval_variant_result . ref , alt = pheval_variant_result . alt , score = pheval_variant_result . score , rank = rank , )","title":"from_variant_result()"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.ResultSorter","text":"Class for sorting PhEvalResult instances based on a given sort order. Source code in src/pheval/post_processing/post_processing.py 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 class ResultSorter : \"\"\"Class for sorting PhEvalResult instances based on a given sort order.\"\"\" def __init__ ( self , pheval_results : [ PhEvalResult ], sort_order : SortOrder ): \"\"\" Initialise ResultSorter Args: pheval_results ([PhEvalResult]): List of PhEvalResult instances to be sorted sort_order (SortOrder): Sorting order to be applied \"\"\" self . pheval_results = pheval_results self . sort_order = sort_order def _sort_by_decreasing_score ( self ) -> [ PhEvalResult ]: \"\"\" Sort results in descending order based on the score Returns: [PhEvalResult]: Sorted list of PhEvalResult instances. \"\"\" return sorted ( self . pheval_results , key = operator . attrgetter ( \"score\" ), reverse = True ) def _sort_by_increasing_score ( self ) -> [ PhEvalResult ]: \"\"\" Sort results in ascending order based on the score Returns: [PhEvalResult]: Sorted list of PhEvalResult instances. \"\"\" return sorted ( self . pheval_results , key = operator . attrgetter ( \"score\" ), reverse = False ) def sort_pheval_results ( self ) -> [ PhEvalResult ]: \"\"\" Sort results based on the specified sort order. Returns: [PhEvalResult]: Sorted list of PhEvalResult instances. \"\"\" return ( self . _sort_by_increasing_score () if self . sort_order == SortOrder . ASCENDING else self . _sort_by_decreasing_score () )","title":"ResultSorter"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.ResultSorter.__init__","text":"Initialise ResultSorter Parameters: Name Type Description Default pheval_results [ PhEvalResult ] List of PhEvalResult instances to be sorted required sort_order SortOrder Sorting order to be applied required Source code in src/pheval/post_processing/post_processing.py 188 189 190 191 192 193 194 195 196 197 def __init__ ( self , pheval_results : [ PhEvalResult ], sort_order : SortOrder ): \"\"\" Initialise ResultSorter Args: pheval_results ([PhEvalResult]): List of PhEvalResult instances to be sorted sort_order (SortOrder): Sorting order to be applied \"\"\" self . pheval_results = pheval_results self . sort_order = sort_order","title":"__init__()"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.ResultSorter.sort_pheval_results","text":"Sort results based on the specified sort order. Returns: Type Description [ PhEvalResult ] [PhEvalResult]: Sorted list of PhEvalResult instances. Source code in src/pheval/post_processing/post_processing.py 217 218 219 220 221 222 223 224 225 226 227 228 def sort_pheval_results ( self ) -> [ PhEvalResult ]: \"\"\" Sort results based on the specified sort order. Returns: [PhEvalResult]: Sorted list of PhEvalResult instances. \"\"\" return ( self . _sort_by_increasing_score () if self . sort_order == SortOrder . ASCENDING else self . _sort_by_decreasing_score () )","title":"sort_pheval_results()"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.SortOrder","text":"Bases: Enum Enumeration representing sorting orders. Source code in src/pheval/post_processing/post_processing.py 176 177 178 179 180 181 182 class SortOrder ( Enum ): \"\"\"Enumeration representing sorting orders.\"\"\" ASCENDING = 1 \"\"\"Ascending sort order.\"\"\" DESCENDING = 2 \"\"\"Descending sort order.\"\"\"","title":"SortOrder"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.SortOrder.ASCENDING","text":"Ascending sort order.","title":"ASCENDING"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.SortOrder.DESCENDING","text":"Descending sort order.","title":"DESCENDING"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.calculate_end_pos","text":"Calculate the end position for a variant Parameters: Name Type Description Default variant_start int The start position of the variant required variant_ref str The reference allele of the variant required Returns: Name Type Description int int The end position of the variant Source code in src/pheval/post_processing/post_processing.py 13 14 15 16 17 18 19 20 21 22 def calculate_end_pos ( variant_start : int , variant_ref : str ) -> int : \"\"\"Calculate the end position for a variant Args: variant_start (int): The start position of the variant variant_ref (str): The reference allele of the variant Returns: int: The end position of the variant \"\"\" return variant_start + len ( variant_ref ) - 1","title":"calculate_end_pos()"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.generate_pheval_result","text":"Generate PhEval variant, gene or disease TSV result based on input results. Parameters: Name Type Description Default pheval_result [ PhEvalResult ] List of PhEvalResult instances to be processed. required sort_order_str str String representation of the desired sorting order. required output_dir Path Path to the output directory. required tool_result_path Path Path to the tool-specific result file. required Raises: Type Description ValueError If the results are not all the same type or an error occurs during file writing. Source code in src/pheval/post_processing/post_processing.py 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 def generate_pheval_result ( pheval_result : [ PhEvalResult ], sort_order_str : str , output_dir : Path , tool_result_path : Path , ) -> None : \"\"\" Generate PhEval variant, gene or disease TSV result based on input results. Args: pheval_result ([PhEvalResult]): List of PhEvalResult instances to be processed. sort_order_str (str): String representation of the desired sorting order. output_dir (Path): Path to the output directory. tool_result_path (Path): Path to the tool-specific result file. Raises: ValueError: If the results are not all the same type or an error occurs during file writing. \"\"\" if not pheval_result : info_log . warning ( f \"No results found for { tool_result_path . name } \" ) return ranked_pheval_result = _create_pheval_result ( pheval_result , sort_order_str ) if all ( isinstance ( result , PhEvalGeneResult ) for result in pheval_result ): _write_pheval_gene_result ( ranked_pheval_result , output_dir , tool_result_path ) elif all ( isinstance ( result , PhEvalVariantResult ) for result in pheval_result ): _write_pheval_variant_result ( ranked_pheval_result , output_dir , tool_result_path ) elif all ( isinstance ( result , PhEvalDiseaseResult ) for result in pheval_result ): _write_pheval_disease_result ( ranked_pheval_result , output_dir , tool_result_path ) else : raise ValueError ( \"Results are not all of the same type.\" )","title":"generate_pheval_result()"},{"location":"api/pheval/prepare/create_noisy_phenopackets/","text":"HpoRandomiser Class for randomising phenopacket phenotypic features using Human Phenotype Ontology (HPO). Source code in src/pheval/prepare/create_noisy_phenopackets.py 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 class HpoRandomiser : \"\"\"Class for randomising phenopacket phenotypic features using Human Phenotype Ontology (HPO).\"\"\" def __init__ ( self , hpo_ontology : ProntoImplementation , scramble_factor : float ): \"\"\" Initialise the HpoRandomiser. Args: hpo_ontology (ProntoImplementation): The instance of the HPO ontology. scramble_factor (float): A factor for scrambling phenotypic features. \"\"\" self . hpo_ontology = hpo_ontology self . phenotypic_abnormalities = set ( hpo_ontology . roots ( predicates = [ \"HP:0000118\" ])) self . scramble_factor = scramble_factor def scramble_factor_proportions ( self , phenotypic_features : list [ PhenotypicFeature ]) -> int : \"\"\" Calculate the proportion of scrambled HPO terms based on the scramble factor. Args: phenotypic_features (list[PhenotypicFeature]): List of phenotypic features. Returns: int: The calculated number of phenotypic features to be scrambled. \"\"\" if len ( phenotypic_features ) == 1 : return 1 else : return int ( round ( len ( phenotypic_features ) * self . scramble_factor , 0 )) def retrieve_hpo_term ( self , hpo_id : str ) -> PhenotypicFeature : \"\"\" Retrieve an HPO term based on the provided HPO ID. Args: hpo_id (str): The HPO ID of the term to retrieve. Returns: PhenotypicFeature: The PhenotypicFeature object representing the retrieved HPO term. \"\"\" rels = self . hpo_ontology . entity_alias_map ( hpo_id ) hpo_term = \"\" . join ( rels [( list ( rels . keys ())[ 0 ])]) return PhenotypicFeature ( type = OntologyClass ( id = hpo_id , label = hpo_term )) @staticmethod def retain_real_patient_terms ( phenotypic_features : List [ PhenotypicFeature ], number_of_scrambled_terms : int , ) -> List [ PhenotypicFeature ]: \"\"\" Return a list of real patient HPO terms, retaining a specific number of non-scrambled terms. Args: phenotypic_features (List[PhenotypicFeature]): List of phenotypic features. number_of_scrambled_terms (int): The count of scrambled HPO terms. Returns: List[PhenotypicFeature]: A list of non-scrambled (real patient) HPO terms. \"\"\" if len ( phenotypic_features ) > 1 : number_of_real_id = len ( phenotypic_features ) - number_of_scrambled_terms else : number_of_real_id = 1 return random . sample ( phenotypic_features , number_of_real_id ) def convert_patient_terms_to_parent ( self , phenotypic_features : List [ PhenotypicFeature ], retained_phenotypic_features : List [ PhenotypicFeature ], number_of_scrambled_terms : int , ) -> List [ PhenotypicFeature ]: \"\"\" Convert a subset of patient HPO terms to their respective parent terms. Args: phenotypic_features (List[PhenotypicFeature]): List of all phenotypic features. retained_phenotypic_features (List[PhenotypicFeature]): List of retained non-scrambled phenotypic features. number_of_scrambled_terms (int): The count of scrambled HPO terms. Returns: List[PhenotypicFeature]: A list of HPO terms converted to their parent terms. Note: This method identifies a subset of patient HPO terms that are not retained among the non-scrambled phenotypic features and converts them to their respective parent terms. It then returns a list of parent HPO terms based on the provided scrambled terms count. If no remaining HPO terms are available for conversion, no parent terms are returned. \"\"\" remaining_hpo = [ i for i in phenotypic_features if i not in retained_phenotypic_features ] if len ( remaining_hpo ) == 0 : number_of_scrambled_terms = 0 hpo_terms_to_be_changed = list ( random . sample ( remaining_hpo , number_of_scrambled_terms )) parent_terms = [] for term in hpo_terms_to_be_changed : if self . hpo_ontology . label ( term . type . id ) . startswith ( \"obsolete\" ): obsolete_term = self . hpo_ontology . entity_metadata_map ( term . type . id ) updated_term = list ( obsolete_term . values ())[ 0 ][ 0 ] parents = self . hpo_ontology . hierarchical_parents ( updated_term ) else : parents = self . hpo_ontology . hierarchical_parents ( term . type . id ) if not parents : parent_terms . append ( term ) else : parent_terms . append ( self . retrieve_hpo_term ( random . choice ( parents ))) return parent_terms def create_random_hpo_terms ( self , number_of_scrambled_terms : int ) -> List [ PhenotypicFeature ]: \"\"\" Generate a list of random HPO terms. Args: number_of_scrambled_terms (int): The count of random HPO terms to be generated. Returns: List[PhenotypicFeature]: A list of randomly selected HPO terms. \"\"\" random_ids = list ( random . sample ( sorted ( self . phenotypic_abnormalities ), number_of_scrambled_terms ) ) return [ self . retrieve_hpo_term ( random_id ) for random_id in random_ids ] def randomise_hpo_terms ( self , phenotypic_features : List [ PhenotypicFeature ], ) -> List [ PhenotypicFeature ]: \"\"\" Randomise the provided phenotypic features by combining retained, parent-converted, and random HPO terms. Args: phenotypic_features (List[PhenotypicFeature]): List of phenotypic features to be randomised. Returns: List[PhenotypicFeature]: A list of randomised HPO terms. Note: This method randomises the provided phenotypic features by incorporating three types of HPO terms: 1. Retained Patient Terms: Non-scrambled (real patient) HPO terms retained based on the scramble factor. 2. Converted to Parent Terms: Subset of HPO terms converted to their respective parent terms. 3. Random HPO Terms: Newly generated random HPO terms based on the scramble factor. The method determines the count of terms for each category and combines them to form a final list of randomised HPO terms to be used in the phenotypic features. \"\"\" number_of_scrambled_terms = self . scramble_factor_proportions ( phenotypic_features ) retained_patient_terms = self . retain_real_patient_terms ( phenotypic_features , number_of_scrambled_terms ) return ( retained_patient_terms + self . convert_patient_terms_to_parent ( phenotypic_features , retained_patient_terms , number_of_scrambled_terms ) + self . create_random_hpo_terms ( number_of_scrambled_terms ) ) __init__ ( hpo_ontology , scramble_factor ) Initialise the HpoRandomiser. Parameters: Name Type Description Default hpo_ontology ProntoImplementation The instance of the HPO ontology. required scramble_factor float A factor for scrambling phenotypic features. required Source code in src/pheval/prepare/create_noisy_phenopackets.py 32 33 34 35 36 37 38 39 40 41 42 def __init__ ( self , hpo_ontology : ProntoImplementation , scramble_factor : float ): \"\"\" Initialise the HpoRandomiser. Args: hpo_ontology (ProntoImplementation): The instance of the HPO ontology. scramble_factor (float): A factor for scrambling phenotypic features. \"\"\" self . hpo_ontology = hpo_ontology self . phenotypic_abnormalities = set ( hpo_ontology . roots ( predicates = [ \"HP:0000118\" ])) self . scramble_factor = scramble_factor convert_patient_terms_to_parent ( phenotypic_features , retained_phenotypic_features , number_of_scrambled_terms ) Convert a subset of patient HPO terms to their respective parent terms. Parameters: Name Type Description Default phenotypic_features List [ PhenotypicFeature ] List of all phenotypic features. required retained_phenotypic_features List [ PhenotypicFeature ] List of retained non-scrambled phenotypic features. required number_of_scrambled_terms int The count of scrambled HPO terms. required Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: A list of HPO terms converted to their parent terms. Note This method identifies a subset of patient HPO terms that are not retained among the non-scrambled phenotypic features and converts them to their respective parent terms. It then returns a list of parent HPO terms based on the provided scrambled terms count. If no remaining HPO terms are available for conversion, no parent terms are returned. Source code in src/pheval/prepare/create_noisy_phenopackets.py 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 def convert_patient_terms_to_parent ( self , phenotypic_features : List [ PhenotypicFeature ], retained_phenotypic_features : List [ PhenotypicFeature ], number_of_scrambled_terms : int , ) -> List [ PhenotypicFeature ]: \"\"\" Convert a subset of patient HPO terms to their respective parent terms. Args: phenotypic_features (List[PhenotypicFeature]): List of all phenotypic features. retained_phenotypic_features (List[PhenotypicFeature]): List of retained non-scrambled phenotypic features. number_of_scrambled_terms (int): The count of scrambled HPO terms. Returns: List[PhenotypicFeature]: A list of HPO terms converted to their parent terms. Note: This method identifies a subset of patient HPO terms that are not retained among the non-scrambled phenotypic features and converts them to their respective parent terms. It then returns a list of parent HPO terms based on the provided scrambled terms count. If no remaining HPO terms are available for conversion, no parent terms are returned. \"\"\" remaining_hpo = [ i for i in phenotypic_features if i not in retained_phenotypic_features ] if len ( remaining_hpo ) == 0 : number_of_scrambled_terms = 0 hpo_terms_to_be_changed = list ( random . sample ( remaining_hpo , number_of_scrambled_terms )) parent_terms = [] for term in hpo_terms_to_be_changed : if self . hpo_ontology . label ( term . type . id ) . startswith ( \"obsolete\" ): obsolete_term = self . hpo_ontology . entity_metadata_map ( term . type . id ) updated_term = list ( obsolete_term . values ())[ 0 ][ 0 ] parents = self . hpo_ontology . hierarchical_parents ( updated_term ) else : parents = self . hpo_ontology . hierarchical_parents ( term . type . id ) if not parents : parent_terms . append ( term ) else : parent_terms . append ( self . retrieve_hpo_term ( random . choice ( parents ))) return parent_terms create_random_hpo_terms ( number_of_scrambled_terms ) Generate a list of random HPO terms. Parameters: Name Type Description Default number_of_scrambled_terms int The count of random HPO terms to be generated. required Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: A list of randomly selected HPO terms. Source code in src/pheval/prepare/create_noisy_phenopackets.py 135 136 137 138 139 140 141 142 143 144 145 146 147 148 def create_random_hpo_terms ( self , number_of_scrambled_terms : int ) -> List [ PhenotypicFeature ]: \"\"\" Generate a list of random HPO terms. Args: number_of_scrambled_terms (int): The count of random HPO terms to be generated. Returns: List[PhenotypicFeature]: A list of randomly selected HPO terms. \"\"\" random_ids = list ( random . sample ( sorted ( self . phenotypic_abnormalities ), number_of_scrambled_terms ) ) return [ self . retrieve_hpo_term ( random_id ) for random_id in random_ids ] randomise_hpo_terms ( phenotypic_features ) Randomise the provided phenotypic features by combining retained, parent-converted, and random HPO terms. Parameters: Name Type Description Default phenotypic_features List [ PhenotypicFeature ] List of phenotypic features to be randomised. required Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: A list of randomised HPO terms. Note This method randomises the provided phenotypic features by incorporating three types of HPO terms: 1. Retained Patient Terms: Non-scrambled (real patient) HPO terms retained based on the scramble factor. 2. Converted to Parent Terms: Subset of HPO terms converted to their respective parent terms. 3. Random HPO Terms: Newly generated random HPO terms based on the scramble factor. The method determines the count of terms for each category and combines them to form a final list of randomised HPO terms to be used in the phenotypic features. Source code in src/pheval/prepare/create_noisy_phenopackets.py 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 def randomise_hpo_terms ( self , phenotypic_features : List [ PhenotypicFeature ], ) -> List [ PhenotypicFeature ]: \"\"\" Randomise the provided phenotypic features by combining retained, parent-converted, and random HPO terms. Args: phenotypic_features (List[PhenotypicFeature]): List of phenotypic features to be randomised. Returns: List[PhenotypicFeature]: A list of randomised HPO terms. Note: This method randomises the provided phenotypic features by incorporating three types of HPO terms: 1. Retained Patient Terms: Non-scrambled (real patient) HPO terms retained based on the scramble factor. 2. Converted to Parent Terms: Subset of HPO terms converted to their respective parent terms. 3. Random HPO Terms: Newly generated random HPO terms based on the scramble factor. The method determines the count of terms for each category and combines them to form a final list of randomised HPO terms to be used in the phenotypic features. \"\"\" number_of_scrambled_terms = self . scramble_factor_proportions ( phenotypic_features ) retained_patient_terms = self . retain_real_patient_terms ( phenotypic_features , number_of_scrambled_terms ) return ( retained_patient_terms + self . convert_patient_terms_to_parent ( phenotypic_features , retained_patient_terms , number_of_scrambled_terms ) + self . create_random_hpo_terms ( number_of_scrambled_terms ) ) retain_real_patient_terms ( phenotypic_features , number_of_scrambled_terms ) staticmethod Return a list of real patient HPO terms, retaining a specific number of non-scrambled terms. Parameters: Name Type Description Default phenotypic_features List [ PhenotypicFeature ] List of phenotypic features. required number_of_scrambled_terms int The count of scrambled HPO terms. required Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: A list of non-scrambled (real patient) HPO terms. Source code in src/pheval/prepare/create_noisy_phenopackets.py 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 @staticmethod def retain_real_patient_terms ( phenotypic_features : List [ PhenotypicFeature ], number_of_scrambled_terms : int , ) -> List [ PhenotypicFeature ]: \"\"\" Return a list of real patient HPO terms, retaining a specific number of non-scrambled terms. Args: phenotypic_features (List[PhenotypicFeature]): List of phenotypic features. number_of_scrambled_terms (int): The count of scrambled HPO terms. Returns: List[PhenotypicFeature]: A list of non-scrambled (real patient) HPO terms. \"\"\" if len ( phenotypic_features ) > 1 : number_of_real_id = len ( phenotypic_features ) - number_of_scrambled_terms else : number_of_real_id = 1 return random . sample ( phenotypic_features , number_of_real_id ) retrieve_hpo_term ( hpo_id ) Retrieve an HPO term based on the provided HPO ID. Parameters: Name Type Description Default hpo_id str The HPO ID of the term to retrieve. required Returns: Name Type Description PhenotypicFeature PhenotypicFeature The PhenotypicFeature object representing the retrieved HPO term. Source code in src/pheval/prepare/create_noisy_phenopackets.py 59 60 61 62 63 64 65 66 67 68 69 70 71 def retrieve_hpo_term ( self , hpo_id : str ) -> PhenotypicFeature : \"\"\" Retrieve an HPO term based on the provided HPO ID. Args: hpo_id (str): The HPO ID of the term to retrieve. Returns: PhenotypicFeature: The PhenotypicFeature object representing the retrieved HPO term. \"\"\" rels = self . hpo_ontology . entity_alias_map ( hpo_id ) hpo_term = \"\" . join ( rels [( list ( rels . keys ())[ 0 ])]) return PhenotypicFeature ( type = OntologyClass ( id = hpo_id , label = hpo_term )) scramble_factor_proportions ( phenotypic_features ) Calculate the proportion of scrambled HPO terms based on the scramble factor. Parameters: Name Type Description Default phenotypic_features list [ PhenotypicFeature ] List of phenotypic features. required Returns: Name Type Description int int The calculated number of phenotypic features to be scrambled. Source code in src/pheval/prepare/create_noisy_phenopackets.py 44 45 46 47 48 49 50 51 52 53 54 55 56 57 def scramble_factor_proportions ( self , phenotypic_features : list [ PhenotypicFeature ]) -> int : \"\"\" Calculate the proportion of scrambled HPO terms based on the scramble factor. Args: phenotypic_features (list[PhenotypicFeature]): List of phenotypic features. Returns: int: The calculated number of phenotypic features to be scrambled. \"\"\" if len ( phenotypic_features ) == 1 : return 1 else : return int ( round ( len ( phenotypic_features ) * self . scramble_factor , 0 )) add_noise_to_phenotypic_profile ( hpo_randomiser , phenopacket ) Randomise the phenotypic profile of a Phenopacket or Family. Parameters: Name Type Description Default hpo_randomiser HpoRandomiser An instance of HpoRandomiser used for randomisation. required phenopacket Union [ Phenopacket , Family ] The Phenopacket or Family to be randomised. required Returns: Type Description Union [ Phenopacket , Family ] Union[Phenopacket, Family]: The randomised Phenopacket or Family. Source code in src/pheval/prepare/create_noisy_phenopackets.py 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 def add_noise_to_phenotypic_profile ( hpo_randomiser : HpoRandomiser , phenopacket : Union [ Phenopacket , Family ], ) -> Union [ Phenopacket , Family ]: \"\"\" Randomise the phenotypic profile of a Phenopacket or Family. Args: hpo_randomiser (HpoRandomiser): An instance of HpoRandomiser used for randomisation. phenopacket (Union[Phenopacket, Family]): The Phenopacket or Family to be randomised. Returns: Union[Phenopacket, Family]: The randomised Phenopacket or Family. \"\"\" phenotypic_features = PhenopacketUtil ( phenopacket ) . observed_phenotypic_features () random_phenotypes = hpo_randomiser . randomise_hpo_terms ( phenotypic_features ) randomised_phenopacket = PhenopacketRebuilder ( phenopacket ) . add_randomised_hpo ( random_phenotypes ) return randomised_phenopacket create_scrambled_phenopacket ( output_dir , phenopacket_path , scramble_factor ) Create a scrambled version of a Phenopacket. Parameters: Name Type Description Default output_dir Path The directory to store the output scrambled Phenopacket. required phenopacket_path Path The path to the original Phenopacket file. required scramble_factor float A factor determining the level of scrambling for phenotypic features. required Source code in src/pheval/prepare/create_noisy_phenopackets.py 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 def create_scrambled_phenopacket ( output_dir : Path , phenopacket_path : Path , scramble_factor : float ) -> None : \"\"\" Create a scrambled version of a Phenopacket. Args: output_dir (Path): The directory to store the output scrambled Phenopacket. phenopacket_path (Path): The path to the original Phenopacket file. scramble_factor (float): A factor determining the level of scrambling for phenotypic features. \"\"\" ontology = load_ontology () hpo_randomiser = HpoRandomiser ( ontology , scramble_factor ) phenopacket = phenopacket_reader ( phenopacket_path ) created_noisy_phenopacket = add_noise_to_phenotypic_profile ( hpo_randomiser , phenopacket , ) write_phenopacket ( created_noisy_phenopacket , output_dir . joinpath ( phenopacket_path . name ), ) create_scrambled_phenopackets ( output_dir , phenopacket_dir , scramble_factor ) Create scrambled versions of Phenopackets within a directory. Parameters: Name Type Description Default output_dir Path The directory to store the output scrambled Phenopackets. required phenopacket_dir Path The directory containing the original Phenopacket files. required scramble_factor float A factor determining the level of scrambling for phenotypic features. required Source code in src/pheval/prepare/create_noisy_phenopackets.py 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 def create_scrambled_phenopackets ( output_dir : Path , phenopacket_dir : Path , scramble_factor : float ) -> None : \"\"\" Create scrambled versions of Phenopackets within a directory. Args: output_dir (Path): The directory to store the output scrambled Phenopackets. phenopacket_dir (Path): The directory containing the original Phenopacket files. scramble_factor (float): A factor determining the level of scrambling for phenotypic features. \"\"\" ontology = load_ontology () hpo_randomiser = HpoRandomiser ( ontology , scramble_factor ) phenopacket_files = files_with_suffix ( phenopacket_dir , \".json\" ) for phenopacket_path in phenopacket_files : phenopacket = phenopacket_reader ( phenopacket_path ) created_noisy_phenopacket = add_noise_to_phenotypic_profile ( hpo_randomiser , phenopacket ) write_phenopacket ( created_noisy_phenopacket , output_dir . joinpath ( phenopacket_path . name , ), ) load_ontology () Load the Human Phenotype Ontology (HPO). Returns: Name Type Description ProntoImplementation An instance of ProntoImplementation containing the loaded HPO. Source code in src/pheval/prepare/create_noisy_phenopackets.py 18 19 20 21 22 23 24 25 26 def load_ontology (): \"\"\" Load the Human Phenotype Ontology (HPO). Returns: ProntoImplementation: An instance of ProntoImplementation containing the loaded HPO. \"\"\" resource = OntologyResource ( slug = \"hp.obo\" , local = False ) return ProntoImplementation ( resource ) scramble_phenopackets ( output_dir , phenopacket_path , phenopacket_dir , scramble_factor ) Create scrambled phenopackets from either a single phenopacket or a directory of phenopackets. Parameters: Name Type Description Default output_dir Path The directory to store the output scrambled Phenopackets. required phenopacket_path Path The path to a single Phenopacket file (if applicable). required phenopacket_dir Path The directory containing multiple Phenopacket files (if applicable). required scramble_factor float A factor determining the level of scrambling for phenotypic features. required Source code in src/pheval/prepare/create_noisy_phenopackets.py 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 def scramble_phenopackets ( output_dir : Path , phenopacket_path : Path , phenopacket_dir : Path , scramble_factor : float ) -> None : \"\"\" Create scrambled phenopackets from either a single phenopacket or a directory of phenopackets. Args: output_dir (Path): The directory to store the output scrambled Phenopackets. phenopacket_path (Path): The path to a single Phenopacket file (if applicable). phenopacket_dir (Path): The directory containing multiple Phenopacket files (if applicable). scramble_factor (float): A factor determining the level of scrambling for phenotypic features. \"\"\" output_dir . mkdir ( exist_ok = True ) if phenopacket_path is not None : create_scrambled_phenopacket ( output_dir , phenopacket_path , scramble_factor ) elif phenopacket_dir is not None : create_scrambled_phenopackets ( output_dir , phenopacket_dir , scramble_factor )","title":"Create noisy phenopackets"},{"location":"api/pheval/prepare/create_noisy_phenopackets/#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser","text":"Class for randomising phenopacket phenotypic features using Human Phenotype Ontology (HPO). Source code in src/pheval/prepare/create_noisy_phenopackets.py 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 class HpoRandomiser : \"\"\"Class for randomising phenopacket phenotypic features using Human Phenotype Ontology (HPO).\"\"\" def __init__ ( self , hpo_ontology : ProntoImplementation , scramble_factor : float ): \"\"\" Initialise the HpoRandomiser. Args: hpo_ontology (ProntoImplementation): The instance of the HPO ontology. scramble_factor (float): A factor for scrambling phenotypic features. \"\"\" self . hpo_ontology = hpo_ontology self . phenotypic_abnormalities = set ( hpo_ontology . roots ( predicates = [ \"HP:0000118\" ])) self . scramble_factor = scramble_factor def scramble_factor_proportions ( self , phenotypic_features : list [ PhenotypicFeature ]) -> int : \"\"\" Calculate the proportion of scrambled HPO terms based on the scramble factor. Args: phenotypic_features (list[PhenotypicFeature]): List of phenotypic features. Returns: int: The calculated number of phenotypic features to be scrambled. \"\"\" if len ( phenotypic_features ) == 1 : return 1 else : return int ( round ( len ( phenotypic_features ) * self . scramble_factor , 0 )) def retrieve_hpo_term ( self , hpo_id : str ) -> PhenotypicFeature : \"\"\" Retrieve an HPO term based on the provided HPO ID. Args: hpo_id (str): The HPO ID of the term to retrieve. Returns: PhenotypicFeature: The PhenotypicFeature object representing the retrieved HPO term. \"\"\" rels = self . hpo_ontology . entity_alias_map ( hpo_id ) hpo_term = \"\" . join ( rels [( list ( rels . keys ())[ 0 ])]) return PhenotypicFeature ( type = OntologyClass ( id = hpo_id , label = hpo_term )) @staticmethod def retain_real_patient_terms ( phenotypic_features : List [ PhenotypicFeature ], number_of_scrambled_terms : int , ) -> List [ PhenotypicFeature ]: \"\"\" Return a list of real patient HPO terms, retaining a specific number of non-scrambled terms. Args: phenotypic_features (List[PhenotypicFeature]): List of phenotypic features. number_of_scrambled_terms (int): The count of scrambled HPO terms. Returns: List[PhenotypicFeature]: A list of non-scrambled (real patient) HPO terms. \"\"\" if len ( phenotypic_features ) > 1 : number_of_real_id = len ( phenotypic_features ) - number_of_scrambled_terms else : number_of_real_id = 1 return random . sample ( phenotypic_features , number_of_real_id ) def convert_patient_terms_to_parent ( self , phenotypic_features : List [ PhenotypicFeature ], retained_phenotypic_features : List [ PhenotypicFeature ], number_of_scrambled_terms : int , ) -> List [ PhenotypicFeature ]: \"\"\" Convert a subset of patient HPO terms to their respective parent terms. Args: phenotypic_features (List[PhenotypicFeature]): List of all phenotypic features. retained_phenotypic_features (List[PhenotypicFeature]): List of retained non-scrambled phenotypic features. number_of_scrambled_terms (int): The count of scrambled HPO terms. Returns: List[PhenotypicFeature]: A list of HPO terms converted to their parent terms. Note: This method identifies a subset of patient HPO terms that are not retained among the non-scrambled phenotypic features and converts them to their respective parent terms. It then returns a list of parent HPO terms based on the provided scrambled terms count. If no remaining HPO terms are available for conversion, no parent terms are returned. \"\"\" remaining_hpo = [ i for i in phenotypic_features if i not in retained_phenotypic_features ] if len ( remaining_hpo ) == 0 : number_of_scrambled_terms = 0 hpo_terms_to_be_changed = list ( random . sample ( remaining_hpo , number_of_scrambled_terms )) parent_terms = [] for term in hpo_terms_to_be_changed : if self . hpo_ontology . label ( term . type . id ) . startswith ( \"obsolete\" ): obsolete_term = self . hpo_ontology . entity_metadata_map ( term . type . id ) updated_term = list ( obsolete_term . values ())[ 0 ][ 0 ] parents = self . hpo_ontology . hierarchical_parents ( updated_term ) else : parents = self . hpo_ontology . hierarchical_parents ( term . type . id ) if not parents : parent_terms . append ( term ) else : parent_terms . append ( self . retrieve_hpo_term ( random . choice ( parents ))) return parent_terms def create_random_hpo_terms ( self , number_of_scrambled_terms : int ) -> List [ PhenotypicFeature ]: \"\"\" Generate a list of random HPO terms. Args: number_of_scrambled_terms (int): The count of random HPO terms to be generated. Returns: List[PhenotypicFeature]: A list of randomly selected HPO terms. \"\"\" random_ids = list ( random . sample ( sorted ( self . phenotypic_abnormalities ), number_of_scrambled_terms ) ) return [ self . retrieve_hpo_term ( random_id ) for random_id in random_ids ] def randomise_hpo_terms ( self , phenotypic_features : List [ PhenotypicFeature ], ) -> List [ PhenotypicFeature ]: \"\"\" Randomise the provided phenotypic features by combining retained, parent-converted, and random HPO terms. Args: phenotypic_features (List[PhenotypicFeature]): List of phenotypic features to be randomised. Returns: List[PhenotypicFeature]: A list of randomised HPO terms. Note: This method randomises the provided phenotypic features by incorporating three types of HPO terms: 1. Retained Patient Terms: Non-scrambled (real patient) HPO terms retained based on the scramble factor. 2. Converted to Parent Terms: Subset of HPO terms converted to their respective parent terms. 3. Random HPO Terms: Newly generated random HPO terms based on the scramble factor. The method determines the count of terms for each category and combines them to form a final list of randomised HPO terms to be used in the phenotypic features. \"\"\" number_of_scrambled_terms = self . scramble_factor_proportions ( phenotypic_features ) retained_patient_terms = self . retain_real_patient_terms ( phenotypic_features , number_of_scrambled_terms ) return ( retained_patient_terms + self . convert_patient_terms_to_parent ( phenotypic_features , retained_patient_terms , number_of_scrambled_terms ) + self . create_random_hpo_terms ( number_of_scrambled_terms ) )","title":"HpoRandomiser"},{"location":"api/pheval/prepare/create_noisy_phenopackets/#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.__init__","text":"Initialise the HpoRandomiser. Parameters: Name Type Description Default hpo_ontology ProntoImplementation The instance of the HPO ontology. required scramble_factor float A factor for scrambling phenotypic features. required Source code in src/pheval/prepare/create_noisy_phenopackets.py 32 33 34 35 36 37 38 39 40 41 42 def __init__ ( self , hpo_ontology : ProntoImplementation , scramble_factor : float ): \"\"\" Initialise the HpoRandomiser. Args: hpo_ontology (ProntoImplementation): The instance of the HPO ontology. scramble_factor (float): A factor for scrambling phenotypic features. \"\"\" self . hpo_ontology = hpo_ontology self . phenotypic_abnormalities = set ( hpo_ontology . roots ( predicates = [ \"HP:0000118\" ])) self . scramble_factor = scramble_factor","title":"__init__()"},{"location":"api/pheval/prepare/create_noisy_phenopackets/#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.convert_patient_terms_to_parent","text":"Convert a subset of patient HPO terms to their respective parent terms. Parameters: Name Type Description Default phenotypic_features List [ PhenotypicFeature ] List of all phenotypic features. required retained_phenotypic_features List [ PhenotypicFeature ] List of retained non-scrambled phenotypic features. required number_of_scrambled_terms int The count of scrambled HPO terms. required Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: A list of HPO terms converted to their parent terms. Note This method identifies a subset of patient HPO terms that are not retained among the non-scrambled phenotypic features and converts them to their respective parent terms. It then returns a list of parent HPO terms based on the provided scrambled terms count. If no remaining HPO terms are available for conversion, no parent terms are returned. Source code in src/pheval/prepare/create_noisy_phenopackets.py 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 def convert_patient_terms_to_parent ( self , phenotypic_features : List [ PhenotypicFeature ], retained_phenotypic_features : List [ PhenotypicFeature ], number_of_scrambled_terms : int , ) -> List [ PhenotypicFeature ]: \"\"\" Convert a subset of patient HPO terms to their respective parent terms. Args: phenotypic_features (List[PhenotypicFeature]): List of all phenotypic features. retained_phenotypic_features (List[PhenotypicFeature]): List of retained non-scrambled phenotypic features. number_of_scrambled_terms (int): The count of scrambled HPO terms. Returns: List[PhenotypicFeature]: A list of HPO terms converted to their parent terms. Note: This method identifies a subset of patient HPO terms that are not retained among the non-scrambled phenotypic features and converts them to their respective parent terms. It then returns a list of parent HPO terms based on the provided scrambled terms count. If no remaining HPO terms are available for conversion, no parent terms are returned. \"\"\" remaining_hpo = [ i for i in phenotypic_features if i not in retained_phenotypic_features ] if len ( remaining_hpo ) == 0 : number_of_scrambled_terms = 0 hpo_terms_to_be_changed = list ( random . sample ( remaining_hpo , number_of_scrambled_terms )) parent_terms = [] for term in hpo_terms_to_be_changed : if self . hpo_ontology . label ( term . type . id ) . startswith ( \"obsolete\" ): obsolete_term = self . hpo_ontology . entity_metadata_map ( term . type . id ) updated_term = list ( obsolete_term . values ())[ 0 ][ 0 ] parents = self . hpo_ontology . hierarchical_parents ( updated_term ) else : parents = self . hpo_ontology . hierarchical_parents ( term . type . id ) if not parents : parent_terms . append ( term ) else : parent_terms . append ( self . retrieve_hpo_term ( random . choice ( parents ))) return parent_terms","title":"convert_patient_terms_to_parent()"},{"location":"api/pheval/prepare/create_noisy_phenopackets/#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.create_random_hpo_terms","text":"Generate a list of random HPO terms. Parameters: Name Type Description Default number_of_scrambled_terms int The count of random HPO terms to be generated. required Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: A list of randomly selected HPO terms. Source code in src/pheval/prepare/create_noisy_phenopackets.py 135 136 137 138 139 140 141 142 143 144 145 146 147 148 def create_random_hpo_terms ( self , number_of_scrambled_terms : int ) -> List [ PhenotypicFeature ]: \"\"\" Generate a list of random HPO terms. Args: number_of_scrambled_terms (int): The count of random HPO terms to be generated. Returns: List[PhenotypicFeature]: A list of randomly selected HPO terms. \"\"\" random_ids = list ( random . sample ( sorted ( self . phenotypic_abnormalities ), number_of_scrambled_terms ) ) return [ self . retrieve_hpo_term ( random_id ) for random_id in random_ids ]","title":"create_random_hpo_terms()"},{"location":"api/pheval/prepare/create_noisy_phenopackets/#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.randomise_hpo_terms","text":"Randomise the provided phenotypic features by combining retained, parent-converted, and random HPO terms. Parameters: Name Type Description Default phenotypic_features List [ PhenotypicFeature ] List of phenotypic features to be randomised. required Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: A list of randomised HPO terms. Note This method randomises the provided phenotypic features by incorporating three types of HPO terms: 1. Retained Patient Terms: Non-scrambled (real patient) HPO terms retained based on the scramble factor. 2. Converted to Parent Terms: Subset of HPO terms converted to their respective parent terms. 3. Random HPO Terms: Newly generated random HPO terms based on the scramble factor. The method determines the count of terms for each category and combines them to form a final list of randomised HPO terms to be used in the phenotypic features. Source code in src/pheval/prepare/create_noisy_phenopackets.py 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 def randomise_hpo_terms ( self , phenotypic_features : List [ PhenotypicFeature ], ) -> List [ PhenotypicFeature ]: \"\"\" Randomise the provided phenotypic features by combining retained, parent-converted, and random HPO terms. Args: phenotypic_features (List[PhenotypicFeature]): List of phenotypic features to be randomised. Returns: List[PhenotypicFeature]: A list of randomised HPO terms. Note: This method randomises the provided phenotypic features by incorporating three types of HPO terms: 1. Retained Patient Terms: Non-scrambled (real patient) HPO terms retained based on the scramble factor. 2. Converted to Parent Terms: Subset of HPO terms converted to their respective parent terms. 3. Random HPO Terms: Newly generated random HPO terms based on the scramble factor. The method determines the count of terms for each category and combines them to form a final list of randomised HPO terms to be used in the phenotypic features. \"\"\" number_of_scrambled_terms = self . scramble_factor_proportions ( phenotypic_features ) retained_patient_terms = self . retain_real_patient_terms ( phenotypic_features , number_of_scrambled_terms ) return ( retained_patient_terms + self . convert_patient_terms_to_parent ( phenotypic_features , retained_patient_terms , number_of_scrambled_terms ) + self . create_random_hpo_terms ( number_of_scrambled_terms ) )","title":"randomise_hpo_terms()"},{"location":"api/pheval/prepare/create_noisy_phenopackets/#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.retain_real_patient_terms","text":"Return a list of real patient HPO terms, retaining a specific number of non-scrambled terms. Parameters: Name Type Description Default phenotypic_features List [ PhenotypicFeature ] List of phenotypic features. required number_of_scrambled_terms int The count of scrambled HPO terms. required Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: A list of non-scrambled (real patient) HPO terms. Source code in src/pheval/prepare/create_noisy_phenopackets.py 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 @staticmethod def retain_real_patient_terms ( phenotypic_features : List [ PhenotypicFeature ], number_of_scrambled_terms : int , ) -> List [ PhenotypicFeature ]: \"\"\" Return a list of real patient HPO terms, retaining a specific number of non-scrambled terms. Args: phenotypic_features (List[PhenotypicFeature]): List of phenotypic features. number_of_scrambled_terms (int): The count of scrambled HPO terms. Returns: List[PhenotypicFeature]: A list of non-scrambled (real patient) HPO terms. \"\"\" if len ( phenotypic_features ) > 1 : number_of_real_id = len ( phenotypic_features ) - number_of_scrambled_terms else : number_of_real_id = 1 return random . sample ( phenotypic_features , number_of_real_id )","title":"retain_real_patient_terms()"},{"location":"api/pheval/prepare/create_noisy_phenopackets/#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.retrieve_hpo_term","text":"Retrieve an HPO term based on the provided HPO ID. Parameters: Name Type Description Default hpo_id str The HPO ID of the term to retrieve. required Returns: Name Type Description PhenotypicFeature PhenotypicFeature The PhenotypicFeature object representing the retrieved HPO term. Source code in src/pheval/prepare/create_noisy_phenopackets.py 59 60 61 62 63 64 65 66 67 68 69 70 71 def retrieve_hpo_term ( self , hpo_id : str ) -> PhenotypicFeature : \"\"\" Retrieve an HPO term based on the provided HPO ID. Args: hpo_id (str): The HPO ID of the term to retrieve. Returns: PhenotypicFeature: The PhenotypicFeature object representing the retrieved HPO term. \"\"\" rels = self . hpo_ontology . entity_alias_map ( hpo_id ) hpo_term = \"\" . join ( rels [( list ( rels . keys ())[ 0 ])]) return PhenotypicFeature ( type = OntologyClass ( id = hpo_id , label = hpo_term ))","title":"retrieve_hpo_term()"},{"location":"api/pheval/prepare/create_noisy_phenopackets/#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.scramble_factor_proportions","text":"Calculate the proportion of scrambled HPO terms based on the scramble factor. Parameters: Name Type Description Default phenotypic_features list [ PhenotypicFeature ] List of phenotypic features. required Returns: Name Type Description int int The calculated number of phenotypic features to be scrambled. Source code in src/pheval/prepare/create_noisy_phenopackets.py 44 45 46 47 48 49 50 51 52 53 54 55 56 57 def scramble_factor_proportions ( self , phenotypic_features : list [ PhenotypicFeature ]) -> int : \"\"\" Calculate the proportion of scrambled HPO terms based on the scramble factor. Args: phenotypic_features (list[PhenotypicFeature]): List of phenotypic features. Returns: int: The calculated number of phenotypic features to be scrambled. \"\"\" if len ( phenotypic_features ) == 1 : return 1 else : return int ( round ( len ( phenotypic_features ) * self . scramble_factor , 0 ))","title":"scramble_factor_proportions()"},{"location":"api/pheval/prepare/create_noisy_phenopackets/#src.pheval.prepare.create_noisy_phenopackets.add_noise_to_phenotypic_profile","text":"Randomise the phenotypic profile of a Phenopacket or Family. Parameters: Name Type Description Default hpo_randomiser HpoRandomiser An instance of HpoRandomiser used for randomisation. required phenopacket Union [ Phenopacket , Family ] The Phenopacket or Family to be randomised. required Returns: Type Description Union [ Phenopacket , Family ] Union[Phenopacket, Family]: The randomised Phenopacket or Family. Source code in src/pheval/prepare/create_noisy_phenopackets.py 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 def add_noise_to_phenotypic_profile ( hpo_randomiser : HpoRandomiser , phenopacket : Union [ Phenopacket , Family ], ) -> Union [ Phenopacket , Family ]: \"\"\" Randomise the phenotypic profile of a Phenopacket or Family. Args: hpo_randomiser (HpoRandomiser): An instance of HpoRandomiser used for randomisation. phenopacket (Union[Phenopacket, Family]): The Phenopacket or Family to be randomised. Returns: Union[Phenopacket, Family]: The randomised Phenopacket or Family. \"\"\" phenotypic_features = PhenopacketUtil ( phenopacket ) . observed_phenotypic_features () random_phenotypes = hpo_randomiser . randomise_hpo_terms ( phenotypic_features ) randomised_phenopacket = PhenopacketRebuilder ( phenopacket ) . add_randomised_hpo ( random_phenotypes ) return randomised_phenopacket","title":"add_noise_to_phenotypic_profile()"},{"location":"api/pheval/prepare/create_noisy_phenopackets/#src.pheval.prepare.create_noisy_phenopackets.create_scrambled_phenopacket","text":"Create a scrambled version of a Phenopacket. Parameters: Name Type Description Default output_dir Path The directory to store the output scrambled Phenopacket. required phenopacket_path Path The path to the original Phenopacket file. required scramble_factor float A factor determining the level of scrambling for phenotypic features. required Source code in src/pheval/prepare/create_noisy_phenopackets.py 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 def create_scrambled_phenopacket ( output_dir : Path , phenopacket_path : Path , scramble_factor : float ) -> None : \"\"\" Create a scrambled version of a Phenopacket. Args: output_dir (Path): The directory to store the output scrambled Phenopacket. phenopacket_path (Path): The path to the original Phenopacket file. scramble_factor (float): A factor determining the level of scrambling for phenotypic features. \"\"\" ontology = load_ontology () hpo_randomiser = HpoRandomiser ( ontology , scramble_factor ) phenopacket = phenopacket_reader ( phenopacket_path ) created_noisy_phenopacket = add_noise_to_phenotypic_profile ( hpo_randomiser , phenopacket , ) write_phenopacket ( created_noisy_phenopacket , output_dir . joinpath ( phenopacket_path . name ), )","title":"create_scrambled_phenopacket()"},{"location":"api/pheval/prepare/create_noisy_phenopackets/#src.pheval.prepare.create_noisy_phenopackets.create_scrambled_phenopackets","text":"Create scrambled versions of Phenopackets within a directory. Parameters: Name Type Description Default output_dir Path The directory to store the output scrambled Phenopackets. required phenopacket_dir Path The directory containing the original Phenopacket files. required scramble_factor float A factor determining the level of scrambling for phenotypic features. required Source code in src/pheval/prepare/create_noisy_phenopackets.py 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 def create_scrambled_phenopackets ( output_dir : Path , phenopacket_dir : Path , scramble_factor : float ) -> None : \"\"\" Create scrambled versions of Phenopackets within a directory. Args: output_dir (Path): The directory to store the output scrambled Phenopackets. phenopacket_dir (Path): The directory containing the original Phenopacket files. scramble_factor (float): A factor determining the level of scrambling for phenotypic features. \"\"\" ontology = load_ontology () hpo_randomiser = HpoRandomiser ( ontology , scramble_factor ) phenopacket_files = files_with_suffix ( phenopacket_dir , \".json\" ) for phenopacket_path in phenopacket_files : phenopacket = phenopacket_reader ( phenopacket_path ) created_noisy_phenopacket = add_noise_to_phenotypic_profile ( hpo_randomiser , phenopacket ) write_phenopacket ( created_noisy_phenopacket , output_dir . joinpath ( phenopacket_path . name , ), )","title":"create_scrambled_phenopackets()"},{"location":"api/pheval/prepare/create_noisy_phenopackets/#src.pheval.prepare.create_noisy_phenopackets.load_ontology","text":"Load the Human Phenotype Ontology (HPO). Returns: Name Type Description ProntoImplementation An instance of ProntoImplementation containing the loaded HPO. Source code in src/pheval/prepare/create_noisy_phenopackets.py 18 19 20 21 22 23 24 25 26 def load_ontology (): \"\"\" Load the Human Phenotype Ontology (HPO). Returns: ProntoImplementation: An instance of ProntoImplementation containing the loaded HPO. \"\"\" resource = OntologyResource ( slug = \"hp.obo\" , local = False ) return ProntoImplementation ( resource )","title":"load_ontology()"},{"location":"api/pheval/prepare/create_noisy_phenopackets/#src.pheval.prepare.create_noisy_phenopackets.scramble_phenopackets","text":"Create scrambled phenopackets from either a single phenopacket or a directory of phenopackets. Parameters: Name Type Description Default output_dir Path The directory to store the output scrambled Phenopackets. required phenopacket_path Path The path to a single Phenopacket file (if applicable). required phenopacket_dir Path The directory containing multiple Phenopacket files (if applicable). required scramble_factor float A factor determining the level of scrambling for phenotypic features. required Source code in src/pheval/prepare/create_noisy_phenopackets.py 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 def scramble_phenopackets ( output_dir : Path , phenopacket_path : Path , phenopacket_dir : Path , scramble_factor : float ) -> None : \"\"\" Create scrambled phenopackets from either a single phenopacket or a directory of phenopackets. Args: output_dir (Path): The directory to store the output scrambled Phenopackets. phenopacket_path (Path): The path to a single Phenopacket file (if applicable). phenopacket_dir (Path): The directory containing multiple Phenopacket files (if applicable). scramble_factor (float): A factor determining the level of scrambling for phenotypic features. \"\"\" output_dir . mkdir ( exist_ok = True ) if phenopacket_path is not None : create_scrambled_phenopacket ( output_dir , phenopacket_path , scramble_factor ) elif phenopacket_dir is not None : create_scrambled_phenopackets ( output_dir , phenopacket_dir , scramble_factor )","title":"scramble_phenopackets()"},{"location":"api/pheval/prepare/create_spiked_vcf/","text":"VcfFile dataclass Represents a VCF file with its name, contents, and header information. Attributes: Name Type Description vcf_file_name str The name of the VCF file. vcf_contents List [ str ] The contents of the VCF file. vcf_header VcfHeader The parsed header information of the VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 @dataclass class VcfFile : \"\"\" Represents a VCF file with its name, contents, and header information. Attributes: vcf_file_name (str): The name of the VCF file. vcf_contents (List[str]): The contents of the VCF file. vcf_header (VcfHeader): The parsed header information of the VCF file. \"\"\" vcf_file_name : str = None vcf_contents : List [ str ] = None vcf_header : VcfHeader = None @staticmethod def populate_fields ( template_vcf : Path ): \"\"\" Populate the fields of the VcfFile instance using the contents of a template VCF file. Args: template_vcf (Path): The path to the template VCF file. Returns: VcfFile: An instance of VcfFile with populated fields. \"\"\" contents = read_vcf ( template_vcf ) return VcfFile ( template_vcf . name , contents , VcfHeaderParser ( contents ) . parse_vcf_header ()) populate_fields ( template_vcf ) staticmethod Populate the fields of the VcfFile instance using the contents of a template VCF file. Parameters: Name Type Description Default template_vcf Path The path to the template VCF file. required Returns: Name Type Description VcfFile An instance of VcfFile with populated fields. Source code in src/pheval/prepare/create_spiked_vcf.py 190 191 192 193 194 195 196 197 198 199 200 201 202 203 @staticmethod def populate_fields ( template_vcf : Path ): \"\"\" Populate the fields of the VcfFile instance using the contents of a template VCF file. Args: template_vcf (Path): The path to the template VCF file. Returns: VcfFile: An instance of VcfFile with populated fields. \"\"\" contents = read_vcf ( template_vcf ) return VcfFile ( template_vcf . name , contents , VcfHeaderParser ( contents ) . parse_vcf_header ()) VcfHeader dataclass Data obtained from VCF header. Parameters: Name Type Description Default sample_id str The sample identifier from the VCF header. required assembly str The assembly information obtained from the VCF header. required chr_status bool A boolean indicating whether the VCF denotes chromosomes as chr or not. required Source code in src/pheval/prepare/create_spiked_vcf.py 78 79 80 81 82 83 84 85 86 87 88 89 90 @dataclass class VcfHeader : \"\"\"Data obtained from VCF header. Args: sample_id (str): The sample identifier from the VCF header. assembly (str): The assembly information obtained from the VCF header. chr_status (bool): A boolean indicating whether the VCF denotes chromosomes as chr or not. \"\"\" sample_id : str assembly : str chr_status : bool VcfHeaderParser Class for parsing the header of a VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 class VcfHeaderParser : \"\"\"Class for parsing the header of a VCF file.\"\"\" def __init__ ( self , vcf_contents : list [ str ]): \"\"\" Initialise the VcfHeaderParser. Args: vcf_contents (list[str]): The contents of the VCF file as a list of strings. \"\"\" self . vcf_contents = vcf_contents def parse_assembly ( self ) -> tuple [ str , bool ]: \"\"\" Parse the genome assembly and format of vcf_records. Returns: Tuple[str, bool]: A tuple containing the assembly and chromosome status (True/False). \"\"\" vcf_assembly = {} chr_status = False for line in self . vcf_contents : if line . startswith ( \"##contig=<ID\" ): tokens = line . split ( \",\" ) chromosome = re . sub ( r \"^.*?ID=\" , \"\" , [ token for token in tokens if \"ID=\" in token ][ 0 ] ) if \"chr\" in chromosome : chr_status = True chromosome = chromosome . replace ( \"chr\" , \"\" ) contig_length = re . sub ( \"[^0-9]+\" , \"\" , [ token for token in tokens if \"length=\" in token ][ 0 ], ) vcf_assembly [ chromosome ] = int ( contig_length ) vcf_assembly = { i : vcf_assembly [ i ] for i in vcf_assembly if i . isdigit ()} assembly = [ k for k , v in genome_assemblies . items () if v == vcf_assembly ][ 0 ] return assembly , chr_status def parse_sample_id ( self ) -> str : \"\"\" Parse the sample ID of the VCF. Returns: str: The sample ID extracted from the VCF header. \"\"\" for line in self . vcf_contents : if line . startswith ( \"#CHROM\" ): return line . split ( \" \\t \" )[ 9 ] . rstrip () def parse_vcf_header ( self ) -> VcfHeader : \"\"\" Parse the header of the VCF. Returns: VcfHeader: An instance of VcfHeader containing sample ID, assembly, and chromosome status. \"\"\" assembly , chr_status = self . parse_assembly () sample_id = self . parse_sample_id () return VcfHeader ( sample_id , assembly , chr_status ) __init__ ( vcf_contents ) Initialise the VcfHeaderParser. Parameters: Name Type Description Default vcf_contents list [ str ] The contents of the VCF file as a list of strings. required Source code in src/pheval/prepare/create_spiked_vcf.py 115 116 117 118 119 120 121 122 def __init__ ( self , vcf_contents : list [ str ]): \"\"\" Initialise the VcfHeaderParser. Args: vcf_contents (list[str]): The contents of the VCF file as a list of strings. \"\"\" self . vcf_contents = vcf_contents parse_assembly () Parse the genome assembly and format of vcf_records. Returns: Type Description tuple [ str , bool ] Tuple[str, bool]: A tuple containing the assembly and chromosome status (True/False). Source code in src/pheval/prepare/create_spiked_vcf.py 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 def parse_assembly ( self ) -> tuple [ str , bool ]: \"\"\" Parse the genome assembly and format of vcf_records. Returns: Tuple[str, bool]: A tuple containing the assembly and chromosome status (True/False). \"\"\" vcf_assembly = {} chr_status = False for line in self . vcf_contents : if line . startswith ( \"##contig=<ID\" ): tokens = line . split ( \",\" ) chromosome = re . sub ( r \"^.*?ID=\" , \"\" , [ token for token in tokens if \"ID=\" in token ][ 0 ] ) if \"chr\" in chromosome : chr_status = True chromosome = chromosome . replace ( \"chr\" , \"\" ) contig_length = re . sub ( \"[^0-9]+\" , \"\" , [ token for token in tokens if \"length=\" in token ][ 0 ], ) vcf_assembly [ chromosome ] = int ( contig_length ) vcf_assembly = { i : vcf_assembly [ i ] for i in vcf_assembly if i . isdigit ()} assembly = [ k for k , v in genome_assemblies . items () if v == vcf_assembly ][ 0 ] return assembly , chr_status parse_sample_id () Parse the sample ID of the VCF. Returns: Name Type Description str str The sample ID extracted from the VCF header. Source code in src/pheval/prepare/create_spiked_vcf.py 152 153 154 155 156 157 158 159 160 161 def parse_sample_id ( self ) -> str : \"\"\" Parse the sample ID of the VCF. Returns: str: The sample ID extracted from the VCF header. \"\"\" for line in self . vcf_contents : if line . startswith ( \"#CHROM\" ): return line . split ( \" \\t \" )[ 9 ] . rstrip () parse_vcf_header () Parse the header of the VCF. Returns: Name Type Description VcfHeader VcfHeader An instance of VcfHeader containing sample ID, assembly, and chromosome status. Source code in src/pheval/prepare/create_spiked_vcf.py 163 164 165 166 167 168 169 170 171 172 def parse_vcf_header ( self ) -> VcfHeader : \"\"\" Parse the header of the VCF. Returns: VcfHeader: An instance of VcfHeader containing sample ID, assembly, and chromosome status. \"\"\" assembly , chr_status = self . parse_assembly () sample_id = self . parse_sample_id () return VcfHeader ( sample_id , assembly , chr_status ) VcfSpiker Class for spiking proband variants into template VCF file contents. Source code in src/pheval/prepare/create_spiked_vcf.py 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 class VcfSpiker : \"\"\"Class for spiking proband variants into template VCF file contents.\"\"\" def __init__ ( self , vcf_contents : list [ str ], proband_causative_variants : list [ ProbandCausativeVariant ], vcf_header : VcfHeader , ): \"\"\" Initialise the VcfSpiker. Args: vcf_contents (List[str]): Contents of the template VCF file. proband_causative_variants (List[ProbandCausativeVariant]): List of proband causative variants. vcf_header (VcfHeader): The VCF header information. \"\"\" self . vcf_contents = vcf_contents self . proband_causative_variants = proband_causative_variants self . vcf_header = vcf_header def construct_variant_entry ( self , proband_variant_data : ProbandCausativeVariant ) -> List [ str ]: \"\"\" Construct variant entries. Args: proband_variant_data (ProbandCausativeVariant): Data for the proband variant. Returns: List[str]: Constructed variant entry as a list of strings. \"\"\" genotype_codes = { \"hemizygous\" : \"0/1\" , \"homozygous\" : \"1/1\" , \"heterozygous\" : \"0/1\" , \"compound heterozygous\" : \"0/1\" , } if self . vcf_header . chr_status is True and \"chr\" not in proband_variant_data . variant . chrom : proband_variant_data . variant . chrom = \"chr\" + proband_variant_data . variant . chrom return [ proband_variant_data . variant . chrom , str ( proband_variant_data . variant . pos ), \".\" , proband_variant_data . variant . ref , ( f \"< { proband_variant_data . variant . alt } >\" if proband_variant_data . variant . ref == \"N\" else proband_variant_data . variant . alt ), \"100\" , \"PASS\" , proband_variant_data . info if proband_variant_data . info else \".\" , \"GT\" , genotype_codes [ proband_variant_data . genotype . lower ()] + \" \\n \" , ] def construct_vcf_records ( self , template_vcf_name : str ) -> List [ str ]: \"\"\" Construct updated VCF records by inserting spiked variants into the correct positions within the VCF. Args: template_vcf_name (str): Name of the template VCF file. Returns: List[str]: Updated VCF records containing the spiked variants. \"\"\" updated_vcf_records = copy ( self . vcf_contents ) for variant in self . proband_causative_variants : variant_entry = self . construct_variant_entry ( variant ) matching_indices = [ i for i , val in enumerate ( updated_vcf_records ) if val . split ( \" \\t \" )[ 0 ] == variant_entry [ 0 ] and int ( val . split ( \" \\t \" )[ 1 ]) < int ( variant_entry [ 1 ]) ] if matching_indices : variant_entry_position = matching_indices [ - 1 ] + 1 else : info_log . warning ( f \"Could not find entry position for { variant . variant . chrom } - { variant . variant . pos } -\" f \" { variant . variant . ref } - { variant . variant . alt } in { template_vcf_name } , \" \"inserting at end of VCF contents.\" ) variant_entry_position = len ( updated_vcf_records ) updated_vcf_records . insert ( variant_entry_position , \" \\t \" . join ( variant_entry )) return updated_vcf_records def construct_header ( self , updated_vcf_records : List [ str ]) -> List [ str ]: \"\"\" Construct the header of the VCF. Args: updated_vcf_records (List[str]): Updated VCF records. Returns: List[str]: Constructed header as a list of strings. \"\"\" updated_vcf_file = [] for line in updated_vcf_records : if line . startswith ( \"#\" ): text = line . replace ( self . vcf_header . sample_id , self . proband_causative_variants [ 0 ] . proband_id , ) else : text = line updated_vcf_file . append ( text ) return updated_vcf_file def construct_vcf ( self , template_vcf_name : str ) -> List [ str ]: \"\"\" Construct the entire spiked VCF file by incorporating the spiked variants into the VCF. Args: template_vcf_name (str): Name of the template VCF file. Returns: List[str]: The complete spiked VCF file content as a list of strings. \"\"\" return self . construct_header ( self . construct_vcf_records ( template_vcf_name )) __init__ ( vcf_contents , proband_causative_variants , vcf_header ) Initialise the VcfSpiker. Parameters: Name Type Description Default vcf_contents List [ str ] Contents of the template VCF file. required proband_causative_variants List [ ProbandCausativeVariant ] List of proband causative variants. required vcf_header VcfHeader The VCF header information. required Source code in src/pheval/prepare/create_spiked_vcf.py 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 def __init__ ( self , vcf_contents : list [ str ], proband_causative_variants : list [ ProbandCausativeVariant ], vcf_header : VcfHeader , ): \"\"\" Initialise the VcfSpiker. Args: vcf_contents (List[str]): Contents of the template VCF file. proband_causative_variants (List[ProbandCausativeVariant]): List of proband causative variants. vcf_header (VcfHeader): The VCF header information. \"\"\" self . vcf_contents = vcf_contents self . proband_causative_variants = proband_causative_variants self . vcf_header = vcf_header construct_header ( updated_vcf_records ) Construct the header of the VCF. Parameters: Name Type Description Default updated_vcf_records List [ str ] Updated VCF records. required Returns: Type Description List [ str ] List[str]: Constructed header as a list of strings. Source code in src/pheval/prepare/create_spiked_vcf.py 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 def construct_header ( self , updated_vcf_records : List [ str ]) -> List [ str ]: \"\"\" Construct the header of the VCF. Args: updated_vcf_records (List[str]): Updated VCF records. Returns: List[str]: Constructed header as a list of strings. \"\"\" updated_vcf_file = [] for line in updated_vcf_records : if line . startswith ( \"#\" ): text = line . replace ( self . vcf_header . sample_id , self . proband_causative_variants [ 0 ] . proband_id , ) else : text = line updated_vcf_file . append ( text ) return updated_vcf_file construct_variant_entry ( proband_variant_data ) Construct variant entries. Parameters: Name Type Description Default proband_variant_data ProbandCausativeVariant Data for the proband variant. required Returns: Type Description List [ str ] List[str]: Constructed variant entry as a list of strings. Source code in src/pheval/prepare/create_spiked_vcf.py 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 def construct_variant_entry ( self , proband_variant_data : ProbandCausativeVariant ) -> List [ str ]: \"\"\" Construct variant entries. Args: proband_variant_data (ProbandCausativeVariant): Data for the proband variant. Returns: List[str]: Constructed variant entry as a list of strings. \"\"\" genotype_codes = { \"hemizygous\" : \"0/1\" , \"homozygous\" : \"1/1\" , \"heterozygous\" : \"0/1\" , \"compound heterozygous\" : \"0/1\" , } if self . vcf_header . chr_status is True and \"chr\" not in proband_variant_data . variant . chrom : proband_variant_data . variant . chrom = \"chr\" + proband_variant_data . variant . chrom return [ proband_variant_data . variant . chrom , str ( proband_variant_data . variant . pos ), \".\" , proband_variant_data . variant . ref , ( f \"< { proband_variant_data . variant . alt } >\" if proband_variant_data . variant . ref == \"N\" else proband_variant_data . variant . alt ), \"100\" , \"PASS\" , proband_variant_data . info if proband_variant_data . info else \".\" , \"GT\" , genotype_codes [ proband_variant_data . genotype . lower ()] + \" \\n \" , ] construct_vcf ( template_vcf_name ) Construct the entire spiked VCF file by incorporating the spiked variants into the VCF. Parameters: Name Type Description Default template_vcf_name str Name of the template VCF file. required Returns: Type Description List [ str ] List[str]: The complete spiked VCF file content as a list of strings. Source code in src/pheval/prepare/create_spiked_vcf.py 393 394 395 396 397 398 399 400 401 402 403 def construct_vcf ( self , template_vcf_name : str ) -> List [ str ]: \"\"\" Construct the entire spiked VCF file by incorporating the spiked variants into the VCF. Args: template_vcf_name (str): Name of the template VCF file. Returns: List[str]: The complete spiked VCF file content as a list of strings. \"\"\" return self . construct_header ( self . construct_vcf_records ( template_vcf_name )) construct_vcf_records ( template_vcf_name ) Construct updated VCF records by inserting spiked variants into the correct positions within the VCF. Parameters: Name Type Description Default template_vcf_name str Name of the template VCF file. required Returns: Type Description List [ str ] List[str]: Updated VCF records containing the spiked variants. Source code in src/pheval/prepare/create_spiked_vcf.py 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 def construct_vcf_records ( self , template_vcf_name : str ) -> List [ str ]: \"\"\" Construct updated VCF records by inserting spiked variants into the correct positions within the VCF. Args: template_vcf_name (str): Name of the template VCF file. Returns: List[str]: Updated VCF records containing the spiked variants. \"\"\" updated_vcf_records = copy ( self . vcf_contents ) for variant in self . proband_causative_variants : variant_entry = self . construct_variant_entry ( variant ) matching_indices = [ i for i , val in enumerate ( updated_vcf_records ) if val . split ( \" \\t \" )[ 0 ] == variant_entry [ 0 ] and int ( val . split ( \" \\t \" )[ 1 ]) < int ( variant_entry [ 1 ]) ] if matching_indices : variant_entry_position = matching_indices [ - 1 ] + 1 else : info_log . warning ( f \"Could not find entry position for { variant . variant . chrom } - { variant . variant . pos } -\" f \" { variant . variant . ref } - { variant . variant . alt } in { template_vcf_name } , \" \"inserting at end of VCF contents.\" ) variant_entry_position = len ( updated_vcf_records ) updated_vcf_records . insert ( variant_entry_position , \" \\t \" . join ( variant_entry )) return updated_vcf_records VcfWriter Class for writing VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 class VcfWriter : \"\"\"Class for writing VCF file.\"\"\" def __init__ ( self , vcf_contents : List [ str ], spiked_vcf_file_path : Path , ): \"\"\" Initialise the VcfWriter class. Args: vcf_contents (List[str]): Contents of the VCF file to be written. spiked_vcf_file_path (Path): Path to the spiked VCF file to be created. \"\"\" self . vcf_contents = vcf_contents self . spiked_vcf_file_path = spiked_vcf_file_path def write_gzip ( self ) -> None : \"\"\" Write the VCF contents to a gzipped VCF file. \"\"\" encoded_contents = [ line . encode () for line in self . vcf_contents ] with gzip . open ( self . spiked_vcf_file_path , \"wb\" ) as f : for line in encoded_contents : f . write ( line ) f . close () def write_uncompressed ( self ) -> None : \"\"\" Write the VCF contents to an uncompressed VCF file. \"\"\" with open ( self . spiked_vcf_file_path , \"w\" ) as file : file . writelines ( self . vcf_contents ) file . close () def write_vcf_file ( self ) -> None : \"\"\" Write the VCF file based on compression type. Determines the file writing method based on the compression type of the spiked VCF file path. Writes the VCF contents to the corresponding file format (gzip or uncompressed). \"\"\" self . write_gzip () if is_gzipped ( self . spiked_vcf_file_path ) else self . write_uncompressed () __init__ ( vcf_contents , spiked_vcf_file_path ) Initialise the VcfWriter class. Parameters: Name Type Description Default vcf_contents List [ str ] Contents of the VCF file to be written. required spiked_vcf_file_path Path Path to the spiked VCF file to be created. required Source code in src/pheval/prepare/create_spiked_vcf.py 409 410 411 412 413 414 415 416 417 418 419 420 421 422 def __init__ ( self , vcf_contents : List [ str ], spiked_vcf_file_path : Path , ): \"\"\" Initialise the VcfWriter class. Args: vcf_contents (List[str]): Contents of the VCF file to be written. spiked_vcf_file_path (Path): Path to the spiked VCF file to be created. \"\"\" self . vcf_contents = vcf_contents self . spiked_vcf_file_path = spiked_vcf_file_path write_gzip () Write the VCF contents to a gzipped VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 424 425 426 427 428 429 430 431 432 def write_gzip ( self ) -> None : \"\"\" Write the VCF contents to a gzipped VCF file. \"\"\" encoded_contents = [ line . encode () for line in self . vcf_contents ] with gzip . open ( self . spiked_vcf_file_path , \"wb\" ) as f : for line in encoded_contents : f . write ( line ) f . close () write_uncompressed () Write the VCF contents to an uncompressed VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 434 435 436 437 438 439 440 def write_uncompressed ( self ) -> None : \"\"\" Write the VCF contents to an uncompressed VCF file. \"\"\" with open ( self . spiked_vcf_file_path , \"w\" ) as file : file . writelines ( self . vcf_contents ) file . close () write_vcf_file () Write the VCF file based on compression type. Determines the file writing method based on the compression type of the spiked VCF file path. Writes the VCF contents to the corresponding file format (gzip or uncompressed). Source code in src/pheval/prepare/create_spiked_vcf.py 442 443 444 445 446 447 448 449 def write_vcf_file ( self ) -> None : \"\"\" Write the VCF file based on compression type. Determines the file writing method based on the compression type of the spiked VCF file path. Writes the VCF contents to the corresponding file format (gzip or uncompressed). \"\"\" self . write_gzip () if is_gzipped ( self . spiked_vcf_file_path ) else self . write_uncompressed () check_variant_assembly ( proband_causative_variants , vcf_header , phenopacket_path ) Check the assembly of the variant assembly against the VCF. Parameters: Name Type Description Default proband_causative_variants List [ ProbandCausativeVariant ] A list of causative variants from the proband. required vcf_header VcfHeader An instance of VcfHeader representing the VCF file's header. required phenopacket_path Path The path to the Phenopacket file. required Raises: Type Description ValueError If there are too many or incompatible genome assemblies found. IncompatibleGenomeAssemblyError If the assembly in the Phenopacket does not match the VCF assembly. Source code in src/pheval/prepare/create_spiked_vcf.py 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 def check_variant_assembly ( proband_causative_variants : list [ ProbandCausativeVariant ], vcf_header : VcfHeader , phenopacket_path : Path , ) -> None : \"\"\" Check the assembly of the variant assembly against the VCF. Args: proband_causative_variants (List[ProbandCausativeVariant]): A list of causative variants from the proband. vcf_header (VcfHeader): An instance of VcfHeader representing the VCF file's header. phenopacket_path (Path): The path to the Phenopacket file. Raises: ValueError: If there are too many or incompatible genome assemblies found. IncompatibleGenomeAssemblyError: If the assembly in the Phenopacket does not match the VCF assembly. \"\"\" compatible_genome_assembly = { \"GRCh37\" , \"hg19\" , \"GRCh38\" , \"hg38\" } phenopacket_assembly = list ({ variant . assembly for variant in proband_causative_variants }) if len ( phenopacket_assembly ) > 1 : raise ValueError ( \"Too many genome assemblies!\" ) if phenopacket_assembly [ 0 ] not in compatible_genome_assembly : raise IncompatibleGenomeAssemblyError ( phenopacket_assembly , phenopacket_path ) if ( phenopacket_assembly [ 0 ] in { \"hg19\" , \"GRCh37\" } and vcf_header . assembly not in { \"hg19\" , \"GRCh37\" } ) or ( phenopacket_assembly [ 0 ] in { \"hg38\" , \"GRCh38\" } and vcf_header . assembly not in { \"hg38\" , \"GRCh38\" } ): raise IncompatibleGenomeAssemblyError ( assembly = phenopacket_assembly , phenopacket = phenopacket_path ) create_spiked_vcf ( output_dir , phenopacket_path , hg19_template_vcf , hg38_template_vcf , hg19_vcf_dir , hg38_vcf_dir ) Create a spiked VCF for a Phenopacket. Parameters: Name Type Description Default output_dir Path The directory to store the generated spiked VCF file. required phenopacket_path Path Path to the Phenopacket file. required hg19_template_vcf Path Path to the hg19 template VCF file (optional). required hg38_template_vcf Path Path to the hg38 template VCF file (optional). required hg19_vcf_dir Path The directory containing the hg19 VCF files (optional). required hg38_vcf_dir Path The directory containing the hg38 VCF files (optional). required Raises: Type Description InputError If both hg19_template_vcf and hg38_template_vcf are None. Source code in src/pheval/prepare/create_spiked_vcf.py 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 def create_spiked_vcf ( output_dir : Path , phenopacket_path : Path , hg19_template_vcf : Path , hg38_template_vcf : Path , hg19_vcf_dir : Path , hg38_vcf_dir : Path , ) -> None : \"\"\" Create a spiked VCF for a Phenopacket. Args: output_dir (Path): The directory to store the generated spiked VCF file. phenopacket_path (Path): Path to the Phenopacket file. hg19_template_vcf (Path): Path to the hg19 template VCF file (optional). hg38_template_vcf (Path): Path to the hg38 template VCF file (optional). hg19_vcf_dir (Path): The directory containing the hg19 VCF files (optional). hg38_vcf_dir (Path): The directory containing the hg38 VCF files (optional). Raises: InputError: If both hg19_template_vcf and hg38_template_vcf are None. \"\"\" if hg19_template_vcf is None and hg38_template_vcf is None : raise InputError ( \"Either a hg19 template vcf or hg38 template vcf must be specified\" ) hg19_vcf_info = VcfFile . populate_fields ( hg19_template_vcf ) if hg19_template_vcf else None hg38_vcf_info = VcfFile . populate_fields ( hg38_template_vcf ) if hg38_template_vcf else None spike_and_update_phenopacket ( hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir , output_dir , phenopacket_path ) create_spiked_vcfs ( output_dir , phenopacket_dir , hg19_template_vcf , hg38_template_vcf , hg19_vcf_dir , hg38_vcf_dir ) Create a spiked VCF for a directory of Phenopackets. Parameters: Name Type Description Default output_dir Path The directory to store the generated spiked VCF file. required phenopacket_dir Path Path to the Phenopacket directory. required hg19_template_vcf Path Path to the template hg19 VCF file (optional). required hg38_template_vcf Path Path to the template hg19 VCF file (optional). required hg19_vcf_dir Path The directory containing the hg19 VCF files (optional). required hg38_vcf_dir Path The directory containing the hg38 VCF files (optional). required Raises: Type Description InputError If both hg19_template_vcf and hg38_template_vcf are None. Source code in src/pheval/prepare/create_spiked_vcf.py 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 def create_spiked_vcfs ( output_dir : Path , phenopacket_dir : Path , hg19_template_vcf : Path , hg38_template_vcf : Path , hg19_vcf_dir : Path , hg38_vcf_dir : Path , ) -> None : \"\"\" Create a spiked VCF for a directory of Phenopackets. Args: output_dir (Path): The directory to store the generated spiked VCF file. phenopacket_dir (Path): Path to the Phenopacket directory. hg19_template_vcf (Path): Path to the template hg19 VCF file (optional). hg38_template_vcf (Path): Path to the template hg19 VCF file (optional). hg19_vcf_dir (Path): The directory containing the hg19 VCF files (optional). hg38_vcf_dir (Path): The directory containing the hg38 VCF files (optional). Raises: InputError: If both hg19_template_vcf and hg38_template_vcf are None. \"\"\" if ( hg19_template_vcf is None and hg38_template_vcf is None and hg19_vcf_dir is None and hg38_vcf_dir is None ): raise InputError ( \"Need to specify a VCF!\" ) hg19_vcf_info = VcfFile . populate_fields ( hg19_template_vcf ) if hg19_template_vcf else None hg38_vcf_info = VcfFile . populate_fields ( hg38_template_vcf ) if hg38_template_vcf else None for phenopacket_path in files_with_suffix ( phenopacket_dir , \".json\" ): spike_and_update_phenopacket ( hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir , output_dir , phenopacket_path ) generate_spiked_vcf_file ( output_dir , phenopacket , phenopacket_path , hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir ) Write spiked VCF contents to a new file. Parameters: Name Type Description Default output_dir Path Path to the directory to store the generated file. required phenopacket Union [ Phenopacket , Family ] Phenopacket or Family containing causative variants. required phenopacket_path Path Path to the Phenopacket file. required hg19_vcf_info VcfFile VCF file info for hg19 template vcf. required hg38_vcf_info VcfFile VCF file info for hg38 template vcf. required hg19_vcf_dir Path The directory containing the hg19 VCF files. required hg38_vcf_dir Path The directory containing the hg38 VCF files. required Returns: Name Type Description File File The generated File object representing the newly created spiked VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 def generate_spiked_vcf_file ( output_dir : Path , phenopacket : Union [ Phenopacket , Family ], phenopacket_path : Path , hg19_vcf_info : VcfFile , hg38_vcf_info : VcfFile , hg19_vcf_dir : Path , hg38_vcf_dir : Path , ) -> File : \"\"\" Write spiked VCF contents to a new file. Args: output_dir (Path): Path to the directory to store the generated file. phenopacket (Union[Phenopacket, Family]): Phenopacket or Family containing causative variants. phenopacket_path (Path): Path to the Phenopacket file. hg19_vcf_info (VcfFile): VCF file info for hg19 template vcf. hg38_vcf_info (VcfFile): VCF file info for hg38 template vcf. hg19_vcf_dir (Path): The directory containing the hg19 VCF files. hg38_vcf_dir (Path): The directory containing the hg38 VCF files. Returns: File: The generated File object representing the newly created spiked VCF file. \"\"\" output_dir . mkdir ( exist_ok = True ) info_log . info ( f \" Created a directory { output_dir } \" ) vcf_assembly , spiked_vcf = spike_vcf_contents ( phenopacket , phenopacket_path , hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir ) spiked_vcf_path = output_dir . joinpath ( phenopacket_path . name . replace ( \".json\" , \".vcf.gz\" )) VcfWriter ( spiked_vcf , spiked_vcf_path ) . write_vcf_file () return File ( uri = urllib . parse . unquote ( spiked_vcf_path . as_uri ()), file_attributes = { \"fileFormat\" : \"vcf\" , \"genomeAssembly\" : vcf_assembly }, ) read_vcf ( vcf_file ) Read the contents of a VCF file into memory, handling both uncompressed and gzipped files. Parameters: Name Type Description Default vcf_file Path The path to the VCF file to be read. required Returns: Type Description List [ str ] List[str]: A list containing the lines of the VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 def read_vcf ( vcf_file : Path ) -> List [ str ]: \"\"\" Read the contents of a VCF file into memory, handling both uncompressed and gzipped files. Args: vcf_file (Path): The path to the VCF file to be read. Returns: List[str]: A list containing the lines of the VCF file. \"\"\" open_fn = gzip . open if is_gzipped ( vcf_file ) else open vcf = open_fn ( vcf_file ) vcf_contents = ( [ line . decode () for line in vcf . readlines ()] if is_gzipped ( vcf_file ) else vcf . readlines () ) vcf . close () return vcf_contents select_vcf_template ( phenopacket_path , proband_causative_variants , hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir ) Select the appropriate VCF template based on the assembly information of the proband causative variants. Parameters: Name Type Description Default phenopacket_path Path The path to the Phenopacket file. required proband_causative_variants List [ ProbandCausativeVariant ] A list of causative variants from the proband. required hg19_vcf_info VcfFile VCF file info for hg19 template vcf. required hg38_vcf_info VcfFile CF file info for hg38 template vcf. required hg19_vcf_dir Path The directory containing the hg19 VCF files. required hg38_vcf_dir Path The directory containing the hg38 VCF files. required Returns: Name Type Description VcfFile VcfFile The selected VCF template file based on the assembly information of the proband causative variants. Source code in src/pheval/prepare/create_spiked_vcf.py 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 def select_vcf_template ( phenopacket_path : Path , proband_causative_variants : List [ ProbandCausativeVariant ], hg19_vcf_info : VcfFile , hg38_vcf_info : VcfFile , hg19_vcf_dir : Path , hg38_vcf_dir : Path , ) -> VcfFile : \"\"\" Select the appropriate VCF template based on the assembly information of the proband causative variants. Args: phenopacket_path (Path): The path to the Phenopacket file. proband_causative_variants (List[ProbandCausativeVariant]): A list of causative variants from the proband. hg19_vcf_info (VcfFile): VCF file info for hg19 template vcf. hg38_vcf_info (VcfFile): CF file info for hg38 template vcf. hg19_vcf_dir (Path): The directory containing the hg19 VCF files. hg38_vcf_dir (Path): The directory containing the hg38 VCF files. Returns: VcfFile: The selected VCF template file based on the assembly information of the proband causative variants. \"\"\" if proband_causative_variants [ 0 ] . assembly in [ \"hg19\" , \"GRCh37\" ]: if hg19_vcf_info : return hg19_vcf_info elif hg19_vcf_dir : return VcfFile . populate_fields ( random . choice ( all_files ( hg19_vcf_dir ))) else : raise InputError ( \"Must specify hg19 template VCF!\" ) elif proband_causative_variants [ 0 ] . assembly in [ \"hg38\" , \"GRCh38\" ]: if hg38_vcf_info : return hg38_vcf_info elif hg38_vcf_dir : return VcfFile . populate_fields ( random . choice ( all_files ( hg38_vcf_dir ))) else : raise InputError ( \"Must specify hg38 template VCF!\" ) else : raise IncompatibleGenomeAssemblyError ( proband_causative_variants [ 0 ] . assembly , phenopacket_path ) spike_and_update_phenopacket ( hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir , output_dir , phenopacket_path ) Spike the VCF files with genetic variants relevant to the provided Phenopacket, update the Phenopacket accordingly, and write the updated Phenopacket to the specified output directory. Parameters: Name Type Description Default hg19_vcf_info VcfFile VCF file info for hg19 template vcf. required hg38_vcf_info VcfFile VCF file info for hg38 template vcf. required hg19_vcf_dir Path The directory containing the hg19 VCF files. required hg38_vcf_dir Path The directory containing the hg38 VCF files. required output_dir Path Directory where the updated Phenopacket will be saved. required phenopacket_path Path Path to the original Phenopacket file. required Returns: Type Description None None Source code in src/pheval/prepare/create_spiked_vcf.py 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 def spike_and_update_phenopacket ( hg19_vcf_info : VcfFile , hg38_vcf_info : VcfFile , hg19_vcf_dir : Path , hg38_vcf_dir : Path , output_dir : Path , phenopacket_path : Path , ) -> None : \"\"\" Spike the VCF files with genetic variants relevant to the provided Phenopacket, update the Phenopacket accordingly, and write the updated Phenopacket to the specified output directory. Args: hg19_vcf_info (VcfFile): VCF file info for hg19 template vcf. hg38_vcf_info (VcfFile): VCF file info for hg38 template vcf. hg19_vcf_dir (Path): The directory containing the hg19 VCF files. hg38_vcf_dir (Path): The directory containing the hg38 VCF files. output_dir (Path): Directory where the updated Phenopacket will be saved. phenopacket_path (Path): Path to the original Phenopacket file. Returns: None \"\"\" phenopacket = phenopacket_reader ( phenopacket_path ) spiked_vcf_file_message = generate_spiked_vcf_file ( output_dir , phenopacket , phenopacket_path , hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir , ) updated_phenopacket = PhenopacketRebuilder ( phenopacket ) . add_spiked_vcf_path ( spiked_vcf_file_message ) write_phenopacket ( updated_phenopacket , phenopacket_path ) spike_vcf_contents ( phenopacket , phenopacket_path , hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir ) Spike VCF records with variants obtained from a Phenopacket or Family. Parameters: Name Type Description Default phenopacket Union [ Phenopacket , Family ] Phenopacket or Family containing causative variants. required phenopacket_path Path Path to the Phenopacket file. required hg19_vcf_info VcfFile VCF file info for hg19 template vcf. required hg38_vcf_info VcfFile VCF file info for hg38 template vcf. required hg19_vcf_dir Path The directory containing the hg19 VCF files. required hg38_vcf_dir Path The directory containing the hg38 VCF files. required Returns: Type Description tuple [ str , List [ str ]] A tuple containing: assembly (str): The genome assembly information extracted from VCF header. modified_vcf_contents (List[str]): Modified VCF records with spiked variants. Source code in src/pheval/prepare/create_spiked_vcf.py 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 def spike_vcf_contents ( phenopacket : Union [ Phenopacket , Family ], phenopacket_path : Path , hg19_vcf_info : VcfFile , hg38_vcf_info : VcfFile , hg19_vcf_dir : Path , hg38_vcf_dir : Path , ) -> tuple [ str , List [ str ]]: \"\"\" Spike VCF records with variants obtained from a Phenopacket or Family. Args: phenopacket (Union[Phenopacket, Family]): Phenopacket or Family containing causative variants. phenopacket_path (Path): Path to the Phenopacket file. hg19_vcf_info (VcfFile): VCF file info for hg19 template vcf. hg38_vcf_info (VcfFile): VCF file info for hg38 template vcf. hg19_vcf_dir (Path): The directory containing the hg19 VCF files. hg38_vcf_dir (Path): The directory containing the hg38 VCF files. Returns: A tuple containing: assembly (str): The genome assembly information extracted from VCF header. modified_vcf_contents (List[str]): Modified VCF records with spiked variants. \"\"\" phenopacket_causative_variants = PhenopacketUtil ( phenopacket ) . causative_variants () chosen_template_vcf = select_vcf_template ( phenopacket_path , phenopacket_causative_variants , hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir , ) check_variant_assembly ( phenopacket_causative_variants , chosen_template_vcf . vcf_header , phenopacket_path ) return ( chosen_template_vcf . vcf_header . assembly , VcfSpiker ( chosen_template_vcf . vcf_contents , phenopacket_causative_variants , chosen_template_vcf . vcf_header , ) . construct_vcf ( chosen_template_vcf . vcf_file_name ), ) spike_vcfs ( output_dir , phenopacket_path , phenopacket_dir , hg19_template_vcf , hg38_template_vcf , hg19_vcf_dir , hg38_vcf_dir ) Create spiked VCF from either a Phenopacket or a Phenopacket directory. Parameters: Name Type Description Default output_dir Path The directory to store the generated spiked VCF file(s). required phenopacket_path Path Path to a single Phenopacket file (optional). required phenopacket_dir Path Path to a directory containing Phenopacket files (optional). required hg19_template_vcf Path Path to the hg19 template VCF file (optional). required hg38_template_vcf Path Path to the hg38 template VCF file (optional). required hg19_vcf_dir Path The directory containing the hg19 VCF files (optional). required hg38_vcf_dir Path The directory containing the hg38 VCF files (optional). required Source code in src/pheval/prepare/create_spiked_vcf.py 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 def spike_vcfs ( output_dir : Path , phenopacket_path : Path , phenopacket_dir : Path , hg19_template_vcf : Path , hg38_template_vcf : Path , hg19_vcf_dir : Path , hg38_vcf_dir : Path , ) -> None : \"\"\" Create spiked VCF from either a Phenopacket or a Phenopacket directory. Args: output_dir (Path): The directory to store the generated spiked VCF file(s). phenopacket_path (Path): Path to a single Phenopacket file (optional). phenopacket_dir (Path): Path to a directory containing Phenopacket files (optional). hg19_template_vcf (Path): Path to the hg19 template VCF file (optional). hg38_template_vcf (Path): Path to the hg38 template VCF file (optional). hg19_vcf_dir (Path): The directory containing the hg19 VCF files (optional). hg38_vcf_dir (Path): The directory containing the hg38 VCF files (optional). \"\"\" if phenopacket_path is not None : create_spiked_vcf ( output_dir , phenopacket_path , hg19_template_vcf , hg38_template_vcf , hg19_vcf_dir , hg38_vcf_dir , ) elif phenopacket_dir is not None : create_spiked_vcfs ( output_dir , phenopacket_dir , hg19_template_vcf , hg38_template_vcf , hg19_vcf_dir , hg38_vcf_dir , )","title":"Create spiked vcf"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfFile","text":"Represents a VCF file with its name, contents, and header information. Attributes: Name Type Description vcf_file_name str The name of the VCF file. vcf_contents List [ str ] The contents of the VCF file. vcf_header VcfHeader The parsed header information of the VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 @dataclass class VcfFile : \"\"\" Represents a VCF file with its name, contents, and header information. Attributes: vcf_file_name (str): The name of the VCF file. vcf_contents (List[str]): The contents of the VCF file. vcf_header (VcfHeader): The parsed header information of the VCF file. \"\"\" vcf_file_name : str = None vcf_contents : List [ str ] = None vcf_header : VcfHeader = None @staticmethod def populate_fields ( template_vcf : Path ): \"\"\" Populate the fields of the VcfFile instance using the contents of a template VCF file. Args: template_vcf (Path): The path to the template VCF file. Returns: VcfFile: An instance of VcfFile with populated fields. \"\"\" contents = read_vcf ( template_vcf ) return VcfFile ( template_vcf . name , contents , VcfHeaderParser ( contents ) . parse_vcf_header ())","title":"VcfFile"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfFile.populate_fields","text":"Populate the fields of the VcfFile instance using the contents of a template VCF file. Parameters: Name Type Description Default template_vcf Path The path to the template VCF file. required Returns: Name Type Description VcfFile An instance of VcfFile with populated fields. Source code in src/pheval/prepare/create_spiked_vcf.py 190 191 192 193 194 195 196 197 198 199 200 201 202 203 @staticmethod def populate_fields ( template_vcf : Path ): \"\"\" Populate the fields of the VcfFile instance using the contents of a template VCF file. Args: template_vcf (Path): The path to the template VCF file. Returns: VcfFile: An instance of VcfFile with populated fields. \"\"\" contents = read_vcf ( template_vcf ) return VcfFile ( template_vcf . name , contents , VcfHeaderParser ( contents ) . parse_vcf_header ())","title":"populate_fields()"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfHeader","text":"Data obtained from VCF header. Parameters: Name Type Description Default sample_id str The sample identifier from the VCF header. required assembly str The assembly information obtained from the VCF header. required chr_status bool A boolean indicating whether the VCF denotes chromosomes as chr or not. required Source code in src/pheval/prepare/create_spiked_vcf.py 78 79 80 81 82 83 84 85 86 87 88 89 90 @dataclass class VcfHeader : \"\"\"Data obtained from VCF header. Args: sample_id (str): The sample identifier from the VCF header. assembly (str): The assembly information obtained from the VCF header. chr_status (bool): A boolean indicating whether the VCF denotes chromosomes as chr or not. \"\"\" sample_id : str assembly : str chr_status : bool","title":"VcfHeader"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfHeaderParser","text":"Class for parsing the header of a VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 class VcfHeaderParser : \"\"\"Class for parsing the header of a VCF file.\"\"\" def __init__ ( self , vcf_contents : list [ str ]): \"\"\" Initialise the VcfHeaderParser. Args: vcf_contents (list[str]): The contents of the VCF file as a list of strings. \"\"\" self . vcf_contents = vcf_contents def parse_assembly ( self ) -> tuple [ str , bool ]: \"\"\" Parse the genome assembly and format of vcf_records. Returns: Tuple[str, bool]: A tuple containing the assembly and chromosome status (True/False). \"\"\" vcf_assembly = {} chr_status = False for line in self . vcf_contents : if line . startswith ( \"##contig=<ID\" ): tokens = line . split ( \",\" ) chromosome = re . sub ( r \"^.*?ID=\" , \"\" , [ token for token in tokens if \"ID=\" in token ][ 0 ] ) if \"chr\" in chromosome : chr_status = True chromosome = chromosome . replace ( \"chr\" , \"\" ) contig_length = re . sub ( \"[^0-9]+\" , \"\" , [ token for token in tokens if \"length=\" in token ][ 0 ], ) vcf_assembly [ chromosome ] = int ( contig_length ) vcf_assembly = { i : vcf_assembly [ i ] for i in vcf_assembly if i . isdigit ()} assembly = [ k for k , v in genome_assemblies . items () if v == vcf_assembly ][ 0 ] return assembly , chr_status def parse_sample_id ( self ) -> str : \"\"\" Parse the sample ID of the VCF. Returns: str: The sample ID extracted from the VCF header. \"\"\" for line in self . vcf_contents : if line . startswith ( \"#CHROM\" ): return line . split ( \" \\t \" )[ 9 ] . rstrip () def parse_vcf_header ( self ) -> VcfHeader : \"\"\" Parse the header of the VCF. Returns: VcfHeader: An instance of VcfHeader containing sample ID, assembly, and chromosome status. \"\"\" assembly , chr_status = self . parse_assembly () sample_id = self . parse_sample_id () return VcfHeader ( sample_id , assembly , chr_status )","title":"VcfHeaderParser"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.__init__","text":"Initialise the VcfHeaderParser. Parameters: Name Type Description Default vcf_contents list [ str ] The contents of the VCF file as a list of strings. required Source code in src/pheval/prepare/create_spiked_vcf.py 115 116 117 118 119 120 121 122 def __init__ ( self , vcf_contents : list [ str ]): \"\"\" Initialise the VcfHeaderParser. Args: vcf_contents (list[str]): The contents of the VCF file as a list of strings. \"\"\" self . vcf_contents = vcf_contents","title":"__init__()"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.parse_assembly","text":"Parse the genome assembly and format of vcf_records. Returns: Type Description tuple [ str , bool ] Tuple[str, bool]: A tuple containing the assembly and chromosome status (True/False). Source code in src/pheval/prepare/create_spiked_vcf.py 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 def parse_assembly ( self ) -> tuple [ str , bool ]: \"\"\" Parse the genome assembly and format of vcf_records. Returns: Tuple[str, bool]: A tuple containing the assembly and chromosome status (True/False). \"\"\" vcf_assembly = {} chr_status = False for line in self . vcf_contents : if line . startswith ( \"##contig=<ID\" ): tokens = line . split ( \",\" ) chromosome = re . sub ( r \"^.*?ID=\" , \"\" , [ token for token in tokens if \"ID=\" in token ][ 0 ] ) if \"chr\" in chromosome : chr_status = True chromosome = chromosome . replace ( \"chr\" , \"\" ) contig_length = re . sub ( \"[^0-9]+\" , \"\" , [ token for token in tokens if \"length=\" in token ][ 0 ], ) vcf_assembly [ chromosome ] = int ( contig_length ) vcf_assembly = { i : vcf_assembly [ i ] for i in vcf_assembly if i . isdigit ()} assembly = [ k for k , v in genome_assemblies . items () if v == vcf_assembly ][ 0 ] return assembly , chr_status","title":"parse_assembly()"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.parse_sample_id","text":"Parse the sample ID of the VCF. Returns: Name Type Description str str The sample ID extracted from the VCF header. Source code in src/pheval/prepare/create_spiked_vcf.py 152 153 154 155 156 157 158 159 160 161 def parse_sample_id ( self ) -> str : \"\"\" Parse the sample ID of the VCF. Returns: str: The sample ID extracted from the VCF header. \"\"\" for line in self . vcf_contents : if line . startswith ( \"#CHROM\" ): return line . split ( \" \\t \" )[ 9 ] . rstrip ()","title":"parse_sample_id()"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.parse_vcf_header","text":"Parse the header of the VCF. Returns: Name Type Description VcfHeader VcfHeader An instance of VcfHeader containing sample ID, assembly, and chromosome status. Source code in src/pheval/prepare/create_spiked_vcf.py 163 164 165 166 167 168 169 170 171 172 def parse_vcf_header ( self ) -> VcfHeader : \"\"\" Parse the header of the VCF. Returns: VcfHeader: An instance of VcfHeader containing sample ID, assembly, and chromosome status. \"\"\" assembly , chr_status = self . parse_assembly () sample_id = self . parse_sample_id () return VcfHeader ( sample_id , assembly , chr_status )","title":"parse_vcf_header()"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfSpiker","text":"Class for spiking proband variants into template VCF file contents. Source code in src/pheval/prepare/create_spiked_vcf.py 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 class VcfSpiker : \"\"\"Class for spiking proband variants into template VCF file contents.\"\"\" def __init__ ( self , vcf_contents : list [ str ], proband_causative_variants : list [ ProbandCausativeVariant ], vcf_header : VcfHeader , ): \"\"\" Initialise the VcfSpiker. Args: vcf_contents (List[str]): Contents of the template VCF file. proband_causative_variants (List[ProbandCausativeVariant]): List of proband causative variants. vcf_header (VcfHeader): The VCF header information. \"\"\" self . vcf_contents = vcf_contents self . proband_causative_variants = proband_causative_variants self . vcf_header = vcf_header def construct_variant_entry ( self , proband_variant_data : ProbandCausativeVariant ) -> List [ str ]: \"\"\" Construct variant entries. Args: proband_variant_data (ProbandCausativeVariant): Data for the proband variant. Returns: List[str]: Constructed variant entry as a list of strings. \"\"\" genotype_codes = { \"hemizygous\" : \"0/1\" , \"homozygous\" : \"1/1\" , \"heterozygous\" : \"0/1\" , \"compound heterozygous\" : \"0/1\" , } if self . vcf_header . chr_status is True and \"chr\" not in proband_variant_data . variant . chrom : proband_variant_data . variant . chrom = \"chr\" + proband_variant_data . variant . chrom return [ proband_variant_data . variant . chrom , str ( proband_variant_data . variant . pos ), \".\" , proband_variant_data . variant . ref , ( f \"< { proband_variant_data . variant . alt } >\" if proband_variant_data . variant . ref == \"N\" else proband_variant_data . variant . alt ), \"100\" , \"PASS\" , proband_variant_data . info if proband_variant_data . info else \".\" , \"GT\" , genotype_codes [ proband_variant_data . genotype . lower ()] + \" \\n \" , ] def construct_vcf_records ( self , template_vcf_name : str ) -> List [ str ]: \"\"\" Construct updated VCF records by inserting spiked variants into the correct positions within the VCF. Args: template_vcf_name (str): Name of the template VCF file. Returns: List[str]: Updated VCF records containing the spiked variants. \"\"\" updated_vcf_records = copy ( self . vcf_contents ) for variant in self . proband_causative_variants : variant_entry = self . construct_variant_entry ( variant ) matching_indices = [ i for i , val in enumerate ( updated_vcf_records ) if val . split ( \" \\t \" )[ 0 ] == variant_entry [ 0 ] and int ( val . split ( \" \\t \" )[ 1 ]) < int ( variant_entry [ 1 ]) ] if matching_indices : variant_entry_position = matching_indices [ - 1 ] + 1 else : info_log . warning ( f \"Could not find entry position for { variant . variant . chrom } - { variant . variant . pos } -\" f \" { variant . variant . ref } - { variant . variant . alt } in { template_vcf_name } , \" \"inserting at end of VCF contents.\" ) variant_entry_position = len ( updated_vcf_records ) updated_vcf_records . insert ( variant_entry_position , \" \\t \" . join ( variant_entry )) return updated_vcf_records def construct_header ( self , updated_vcf_records : List [ str ]) -> List [ str ]: \"\"\" Construct the header of the VCF. Args: updated_vcf_records (List[str]): Updated VCF records. Returns: List[str]: Constructed header as a list of strings. \"\"\" updated_vcf_file = [] for line in updated_vcf_records : if line . startswith ( \"#\" ): text = line . replace ( self . vcf_header . sample_id , self . proband_causative_variants [ 0 ] . proband_id , ) else : text = line updated_vcf_file . append ( text ) return updated_vcf_file def construct_vcf ( self , template_vcf_name : str ) -> List [ str ]: \"\"\" Construct the entire spiked VCF file by incorporating the spiked variants into the VCF. Args: template_vcf_name (str): Name of the template VCF file. Returns: List[str]: The complete spiked VCF file content as a list of strings. \"\"\" return self . construct_header ( self . construct_vcf_records ( template_vcf_name ))","title":"VcfSpiker"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfSpiker.__init__","text":"Initialise the VcfSpiker. Parameters: Name Type Description Default vcf_contents List [ str ] Contents of the template VCF file. required proband_causative_variants List [ ProbandCausativeVariant ] List of proband causative variants. required vcf_header VcfHeader The VCF header information. required Source code in src/pheval/prepare/create_spiked_vcf.py 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 def __init__ ( self , vcf_contents : list [ str ], proband_causative_variants : list [ ProbandCausativeVariant ], vcf_header : VcfHeader , ): \"\"\" Initialise the VcfSpiker. Args: vcf_contents (List[str]): Contents of the template VCF file. proband_causative_variants (List[ProbandCausativeVariant]): List of proband causative variants. vcf_header (VcfHeader): The VCF header information. \"\"\" self . vcf_contents = vcf_contents self . proband_causative_variants = proband_causative_variants self . vcf_header = vcf_header","title":"__init__()"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_header","text":"Construct the header of the VCF. Parameters: Name Type Description Default updated_vcf_records List [ str ] Updated VCF records. required Returns: Type Description List [ str ] List[str]: Constructed header as a list of strings. Source code in src/pheval/prepare/create_spiked_vcf.py 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 def construct_header ( self , updated_vcf_records : List [ str ]) -> List [ str ]: \"\"\" Construct the header of the VCF. Args: updated_vcf_records (List[str]): Updated VCF records. Returns: List[str]: Constructed header as a list of strings. \"\"\" updated_vcf_file = [] for line in updated_vcf_records : if line . startswith ( \"#\" ): text = line . replace ( self . vcf_header . sample_id , self . proband_causative_variants [ 0 ] . proband_id , ) else : text = line updated_vcf_file . append ( text ) return updated_vcf_file","title":"construct_header()"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_variant_entry","text":"Construct variant entries. Parameters: Name Type Description Default proband_variant_data ProbandCausativeVariant Data for the proband variant. required Returns: Type Description List [ str ] List[str]: Constructed variant entry as a list of strings. Source code in src/pheval/prepare/create_spiked_vcf.py 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 def construct_variant_entry ( self , proband_variant_data : ProbandCausativeVariant ) -> List [ str ]: \"\"\" Construct variant entries. Args: proband_variant_data (ProbandCausativeVariant): Data for the proband variant. Returns: List[str]: Constructed variant entry as a list of strings. \"\"\" genotype_codes = { \"hemizygous\" : \"0/1\" , \"homozygous\" : \"1/1\" , \"heterozygous\" : \"0/1\" , \"compound heterozygous\" : \"0/1\" , } if self . vcf_header . chr_status is True and \"chr\" not in proband_variant_data . variant . chrom : proband_variant_data . variant . chrom = \"chr\" + proband_variant_data . variant . chrom return [ proband_variant_data . variant . chrom , str ( proband_variant_data . variant . pos ), \".\" , proband_variant_data . variant . ref , ( f \"< { proband_variant_data . variant . alt } >\" if proband_variant_data . variant . ref == \"N\" else proband_variant_data . variant . alt ), \"100\" , \"PASS\" , proband_variant_data . info if proband_variant_data . info else \".\" , \"GT\" , genotype_codes [ proband_variant_data . genotype . lower ()] + \" \\n \" , ]","title":"construct_variant_entry()"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_vcf","text":"Construct the entire spiked VCF file by incorporating the spiked variants into the VCF. Parameters: Name Type Description Default template_vcf_name str Name of the template VCF file. required Returns: Type Description List [ str ] List[str]: The complete spiked VCF file content as a list of strings. Source code in src/pheval/prepare/create_spiked_vcf.py 393 394 395 396 397 398 399 400 401 402 403 def construct_vcf ( self , template_vcf_name : str ) -> List [ str ]: \"\"\" Construct the entire spiked VCF file by incorporating the spiked variants into the VCF. Args: template_vcf_name (str): Name of the template VCF file. Returns: List[str]: The complete spiked VCF file content as a list of strings. \"\"\" return self . construct_header ( self . construct_vcf_records ( template_vcf_name ))","title":"construct_vcf()"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_vcf_records","text":"Construct updated VCF records by inserting spiked variants into the correct positions within the VCF. Parameters: Name Type Description Default template_vcf_name str Name of the template VCF file. required Returns: Type Description List [ str ] List[str]: Updated VCF records containing the spiked variants. Source code in src/pheval/prepare/create_spiked_vcf.py 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 def construct_vcf_records ( self , template_vcf_name : str ) -> List [ str ]: \"\"\" Construct updated VCF records by inserting spiked variants into the correct positions within the VCF. Args: template_vcf_name (str): Name of the template VCF file. Returns: List[str]: Updated VCF records containing the spiked variants. \"\"\" updated_vcf_records = copy ( self . vcf_contents ) for variant in self . proband_causative_variants : variant_entry = self . construct_variant_entry ( variant ) matching_indices = [ i for i , val in enumerate ( updated_vcf_records ) if val . split ( \" \\t \" )[ 0 ] == variant_entry [ 0 ] and int ( val . split ( \" \\t \" )[ 1 ]) < int ( variant_entry [ 1 ]) ] if matching_indices : variant_entry_position = matching_indices [ - 1 ] + 1 else : info_log . warning ( f \"Could not find entry position for { variant . variant . chrom } - { variant . variant . pos } -\" f \" { variant . variant . ref } - { variant . variant . alt } in { template_vcf_name } , \" \"inserting at end of VCF contents.\" ) variant_entry_position = len ( updated_vcf_records ) updated_vcf_records . insert ( variant_entry_position , \" \\t \" . join ( variant_entry )) return updated_vcf_records","title":"construct_vcf_records()"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfWriter","text":"Class for writing VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 class VcfWriter : \"\"\"Class for writing VCF file.\"\"\" def __init__ ( self , vcf_contents : List [ str ], spiked_vcf_file_path : Path , ): \"\"\" Initialise the VcfWriter class. Args: vcf_contents (List[str]): Contents of the VCF file to be written. spiked_vcf_file_path (Path): Path to the spiked VCF file to be created. \"\"\" self . vcf_contents = vcf_contents self . spiked_vcf_file_path = spiked_vcf_file_path def write_gzip ( self ) -> None : \"\"\" Write the VCF contents to a gzipped VCF file. \"\"\" encoded_contents = [ line . encode () for line in self . vcf_contents ] with gzip . open ( self . spiked_vcf_file_path , \"wb\" ) as f : for line in encoded_contents : f . write ( line ) f . close () def write_uncompressed ( self ) -> None : \"\"\" Write the VCF contents to an uncompressed VCF file. \"\"\" with open ( self . spiked_vcf_file_path , \"w\" ) as file : file . writelines ( self . vcf_contents ) file . close () def write_vcf_file ( self ) -> None : \"\"\" Write the VCF file based on compression type. Determines the file writing method based on the compression type of the spiked VCF file path. Writes the VCF contents to the corresponding file format (gzip or uncompressed). \"\"\" self . write_gzip () if is_gzipped ( self . spiked_vcf_file_path ) else self . write_uncompressed ()","title":"VcfWriter"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfWriter.__init__","text":"Initialise the VcfWriter class. Parameters: Name Type Description Default vcf_contents List [ str ] Contents of the VCF file to be written. required spiked_vcf_file_path Path Path to the spiked VCF file to be created. required Source code in src/pheval/prepare/create_spiked_vcf.py 409 410 411 412 413 414 415 416 417 418 419 420 421 422 def __init__ ( self , vcf_contents : List [ str ], spiked_vcf_file_path : Path , ): \"\"\" Initialise the VcfWriter class. Args: vcf_contents (List[str]): Contents of the VCF file to be written. spiked_vcf_file_path (Path): Path to the spiked VCF file to be created. \"\"\" self . vcf_contents = vcf_contents self . spiked_vcf_file_path = spiked_vcf_file_path","title":"__init__()"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfWriter.write_gzip","text":"Write the VCF contents to a gzipped VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 424 425 426 427 428 429 430 431 432 def write_gzip ( self ) -> None : \"\"\" Write the VCF contents to a gzipped VCF file. \"\"\" encoded_contents = [ line . encode () for line in self . vcf_contents ] with gzip . open ( self . spiked_vcf_file_path , \"wb\" ) as f : for line in encoded_contents : f . write ( line ) f . close ()","title":"write_gzip()"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfWriter.write_uncompressed","text":"Write the VCF contents to an uncompressed VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 434 435 436 437 438 439 440 def write_uncompressed ( self ) -> None : \"\"\" Write the VCF contents to an uncompressed VCF file. \"\"\" with open ( self . spiked_vcf_file_path , \"w\" ) as file : file . writelines ( self . vcf_contents ) file . close ()","title":"write_uncompressed()"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfWriter.write_vcf_file","text":"Write the VCF file based on compression type. Determines the file writing method based on the compression type of the spiked VCF file path. Writes the VCF contents to the corresponding file format (gzip or uncompressed). Source code in src/pheval/prepare/create_spiked_vcf.py 442 443 444 445 446 447 448 449 def write_vcf_file ( self ) -> None : \"\"\" Write the VCF file based on compression type. Determines the file writing method based on the compression type of the spiked VCF file path. Writes the VCF contents to the corresponding file format (gzip or uncompressed). \"\"\" self . write_gzip () if is_gzipped ( self . spiked_vcf_file_path ) else self . write_uncompressed ()","title":"write_vcf_file()"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.check_variant_assembly","text":"Check the assembly of the variant assembly against the VCF. Parameters: Name Type Description Default proband_causative_variants List [ ProbandCausativeVariant ] A list of causative variants from the proband. required vcf_header VcfHeader An instance of VcfHeader representing the VCF file's header. required phenopacket_path Path The path to the Phenopacket file. required Raises: Type Description ValueError If there are too many or incompatible genome assemblies found. IncompatibleGenomeAssemblyError If the assembly in the Phenopacket does not match the VCF assembly. Source code in src/pheval/prepare/create_spiked_vcf.py 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 def check_variant_assembly ( proband_causative_variants : list [ ProbandCausativeVariant ], vcf_header : VcfHeader , phenopacket_path : Path , ) -> None : \"\"\" Check the assembly of the variant assembly against the VCF. Args: proband_causative_variants (List[ProbandCausativeVariant]): A list of causative variants from the proband. vcf_header (VcfHeader): An instance of VcfHeader representing the VCF file's header. phenopacket_path (Path): The path to the Phenopacket file. Raises: ValueError: If there are too many or incompatible genome assemblies found. IncompatibleGenomeAssemblyError: If the assembly in the Phenopacket does not match the VCF assembly. \"\"\" compatible_genome_assembly = { \"GRCh37\" , \"hg19\" , \"GRCh38\" , \"hg38\" } phenopacket_assembly = list ({ variant . assembly for variant in proband_causative_variants }) if len ( phenopacket_assembly ) > 1 : raise ValueError ( \"Too many genome assemblies!\" ) if phenopacket_assembly [ 0 ] not in compatible_genome_assembly : raise IncompatibleGenomeAssemblyError ( phenopacket_assembly , phenopacket_path ) if ( phenopacket_assembly [ 0 ] in { \"hg19\" , \"GRCh37\" } and vcf_header . assembly not in { \"hg19\" , \"GRCh37\" } ) or ( phenopacket_assembly [ 0 ] in { \"hg38\" , \"GRCh38\" } and vcf_header . assembly not in { \"hg38\" , \"GRCh38\" } ): raise IncompatibleGenomeAssemblyError ( assembly = phenopacket_assembly , phenopacket = phenopacket_path )","title":"check_variant_assembly()"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.create_spiked_vcf","text":"Create a spiked VCF for a Phenopacket. Parameters: Name Type Description Default output_dir Path The directory to store the generated spiked VCF file. required phenopacket_path Path Path to the Phenopacket file. required hg19_template_vcf Path Path to the hg19 template VCF file (optional). required hg38_template_vcf Path Path to the hg38 template VCF file (optional). required hg19_vcf_dir Path The directory containing the hg19 VCF files (optional). required hg38_vcf_dir Path The directory containing the hg38 VCF files (optional). required Raises: Type Description InputError If both hg19_template_vcf and hg38_template_vcf are None. Source code in src/pheval/prepare/create_spiked_vcf.py 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 def create_spiked_vcf ( output_dir : Path , phenopacket_path : Path , hg19_template_vcf : Path , hg38_template_vcf : Path , hg19_vcf_dir : Path , hg38_vcf_dir : Path , ) -> None : \"\"\" Create a spiked VCF for a Phenopacket. Args: output_dir (Path): The directory to store the generated spiked VCF file. phenopacket_path (Path): Path to the Phenopacket file. hg19_template_vcf (Path): Path to the hg19 template VCF file (optional). hg38_template_vcf (Path): Path to the hg38 template VCF file (optional). hg19_vcf_dir (Path): The directory containing the hg19 VCF files (optional). hg38_vcf_dir (Path): The directory containing the hg38 VCF files (optional). Raises: InputError: If both hg19_template_vcf and hg38_template_vcf are None. \"\"\" if hg19_template_vcf is None and hg38_template_vcf is None : raise InputError ( \"Either a hg19 template vcf or hg38 template vcf must be specified\" ) hg19_vcf_info = VcfFile . populate_fields ( hg19_template_vcf ) if hg19_template_vcf else None hg38_vcf_info = VcfFile . populate_fields ( hg38_template_vcf ) if hg38_template_vcf else None spike_and_update_phenopacket ( hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir , output_dir , phenopacket_path )","title":"create_spiked_vcf()"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.create_spiked_vcfs","text":"Create a spiked VCF for a directory of Phenopackets. Parameters: Name Type Description Default output_dir Path The directory to store the generated spiked VCF file. required phenopacket_dir Path Path to the Phenopacket directory. required hg19_template_vcf Path Path to the template hg19 VCF file (optional). required hg38_template_vcf Path Path to the template hg19 VCF file (optional). required hg19_vcf_dir Path The directory containing the hg19 VCF files (optional). required hg38_vcf_dir Path The directory containing the hg38 VCF files (optional). required Raises: Type Description InputError If both hg19_template_vcf and hg38_template_vcf are None. Source code in src/pheval/prepare/create_spiked_vcf.py 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 def create_spiked_vcfs ( output_dir : Path , phenopacket_dir : Path , hg19_template_vcf : Path , hg38_template_vcf : Path , hg19_vcf_dir : Path , hg38_vcf_dir : Path , ) -> None : \"\"\" Create a spiked VCF for a directory of Phenopackets. Args: output_dir (Path): The directory to store the generated spiked VCF file. phenopacket_dir (Path): Path to the Phenopacket directory. hg19_template_vcf (Path): Path to the template hg19 VCF file (optional). hg38_template_vcf (Path): Path to the template hg19 VCF file (optional). hg19_vcf_dir (Path): The directory containing the hg19 VCF files (optional). hg38_vcf_dir (Path): The directory containing the hg38 VCF files (optional). Raises: InputError: If both hg19_template_vcf and hg38_template_vcf are None. \"\"\" if ( hg19_template_vcf is None and hg38_template_vcf is None and hg19_vcf_dir is None and hg38_vcf_dir is None ): raise InputError ( \"Need to specify a VCF!\" ) hg19_vcf_info = VcfFile . populate_fields ( hg19_template_vcf ) if hg19_template_vcf else None hg38_vcf_info = VcfFile . populate_fields ( hg38_template_vcf ) if hg38_template_vcf else None for phenopacket_path in files_with_suffix ( phenopacket_dir , \".json\" ): spike_and_update_phenopacket ( hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir , output_dir , phenopacket_path )","title":"create_spiked_vcfs()"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.generate_spiked_vcf_file","text":"Write spiked VCF contents to a new file. Parameters: Name Type Description Default output_dir Path Path to the directory to store the generated file. required phenopacket Union [ Phenopacket , Family ] Phenopacket or Family containing causative variants. required phenopacket_path Path Path to the Phenopacket file. required hg19_vcf_info VcfFile VCF file info for hg19 template vcf. required hg38_vcf_info VcfFile VCF file info for hg38 template vcf. required hg19_vcf_dir Path The directory containing the hg19 VCF files. required hg38_vcf_dir Path The directory containing the hg38 VCF files. required Returns: Name Type Description File File The generated File object representing the newly created spiked VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 def generate_spiked_vcf_file ( output_dir : Path , phenopacket : Union [ Phenopacket , Family ], phenopacket_path : Path , hg19_vcf_info : VcfFile , hg38_vcf_info : VcfFile , hg19_vcf_dir : Path , hg38_vcf_dir : Path , ) -> File : \"\"\" Write spiked VCF contents to a new file. Args: output_dir (Path): Path to the directory to store the generated file. phenopacket (Union[Phenopacket, Family]): Phenopacket or Family containing causative variants. phenopacket_path (Path): Path to the Phenopacket file. hg19_vcf_info (VcfFile): VCF file info for hg19 template vcf. hg38_vcf_info (VcfFile): VCF file info for hg38 template vcf. hg19_vcf_dir (Path): The directory containing the hg19 VCF files. hg38_vcf_dir (Path): The directory containing the hg38 VCF files. Returns: File: The generated File object representing the newly created spiked VCF file. \"\"\" output_dir . mkdir ( exist_ok = True ) info_log . info ( f \" Created a directory { output_dir } \" ) vcf_assembly , spiked_vcf = spike_vcf_contents ( phenopacket , phenopacket_path , hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir ) spiked_vcf_path = output_dir . joinpath ( phenopacket_path . name . replace ( \".json\" , \".vcf.gz\" )) VcfWriter ( spiked_vcf , spiked_vcf_path ) . write_vcf_file () return File ( uri = urllib . parse . unquote ( spiked_vcf_path . as_uri ()), file_attributes = { \"fileFormat\" : \"vcf\" , \"genomeAssembly\" : vcf_assembly }, )","title":"generate_spiked_vcf_file()"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.read_vcf","text":"Read the contents of a VCF file into memory, handling both uncompressed and gzipped files. Parameters: Name Type Description Default vcf_file Path The path to the VCF file to be read. required Returns: Type Description List [ str ] List[str]: A list containing the lines of the VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 def read_vcf ( vcf_file : Path ) -> List [ str ]: \"\"\" Read the contents of a VCF file into memory, handling both uncompressed and gzipped files. Args: vcf_file (Path): The path to the VCF file to be read. Returns: List[str]: A list containing the lines of the VCF file. \"\"\" open_fn = gzip . open if is_gzipped ( vcf_file ) else open vcf = open_fn ( vcf_file ) vcf_contents = ( [ line . decode () for line in vcf . readlines ()] if is_gzipped ( vcf_file ) else vcf . readlines () ) vcf . close () return vcf_contents","title":"read_vcf()"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.select_vcf_template","text":"Select the appropriate VCF template based on the assembly information of the proband causative variants. Parameters: Name Type Description Default phenopacket_path Path The path to the Phenopacket file. required proband_causative_variants List [ ProbandCausativeVariant ] A list of causative variants from the proband. required hg19_vcf_info VcfFile VCF file info for hg19 template vcf. required hg38_vcf_info VcfFile CF file info for hg38 template vcf. required hg19_vcf_dir Path The directory containing the hg19 VCF files. required hg38_vcf_dir Path The directory containing the hg38 VCF files. required Returns: Name Type Description VcfFile VcfFile The selected VCF template file based on the assembly information of the proband causative variants. Source code in src/pheval/prepare/create_spiked_vcf.py 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 def select_vcf_template ( phenopacket_path : Path , proband_causative_variants : List [ ProbandCausativeVariant ], hg19_vcf_info : VcfFile , hg38_vcf_info : VcfFile , hg19_vcf_dir : Path , hg38_vcf_dir : Path , ) -> VcfFile : \"\"\" Select the appropriate VCF template based on the assembly information of the proband causative variants. Args: phenopacket_path (Path): The path to the Phenopacket file. proband_causative_variants (List[ProbandCausativeVariant]): A list of causative variants from the proband. hg19_vcf_info (VcfFile): VCF file info for hg19 template vcf. hg38_vcf_info (VcfFile): CF file info for hg38 template vcf. hg19_vcf_dir (Path): The directory containing the hg19 VCF files. hg38_vcf_dir (Path): The directory containing the hg38 VCF files. Returns: VcfFile: The selected VCF template file based on the assembly information of the proband causative variants. \"\"\" if proband_causative_variants [ 0 ] . assembly in [ \"hg19\" , \"GRCh37\" ]: if hg19_vcf_info : return hg19_vcf_info elif hg19_vcf_dir : return VcfFile . populate_fields ( random . choice ( all_files ( hg19_vcf_dir ))) else : raise InputError ( \"Must specify hg19 template VCF!\" ) elif proband_causative_variants [ 0 ] . assembly in [ \"hg38\" , \"GRCh38\" ]: if hg38_vcf_info : return hg38_vcf_info elif hg38_vcf_dir : return VcfFile . populate_fields ( random . choice ( all_files ( hg38_vcf_dir ))) else : raise InputError ( \"Must specify hg38 template VCF!\" ) else : raise IncompatibleGenomeAssemblyError ( proband_causative_variants [ 0 ] . assembly , phenopacket_path )","title":"select_vcf_template()"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.spike_and_update_phenopacket","text":"Spike the VCF files with genetic variants relevant to the provided Phenopacket, update the Phenopacket accordingly, and write the updated Phenopacket to the specified output directory. Parameters: Name Type Description Default hg19_vcf_info VcfFile VCF file info for hg19 template vcf. required hg38_vcf_info VcfFile VCF file info for hg38 template vcf. required hg19_vcf_dir Path The directory containing the hg19 VCF files. required hg38_vcf_dir Path The directory containing the hg38 VCF files. required output_dir Path Directory where the updated Phenopacket will be saved. required phenopacket_path Path Path to the original Phenopacket file. required Returns: Type Description None None Source code in src/pheval/prepare/create_spiked_vcf.py 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 def spike_and_update_phenopacket ( hg19_vcf_info : VcfFile , hg38_vcf_info : VcfFile , hg19_vcf_dir : Path , hg38_vcf_dir : Path , output_dir : Path , phenopacket_path : Path , ) -> None : \"\"\" Spike the VCF files with genetic variants relevant to the provided Phenopacket, update the Phenopacket accordingly, and write the updated Phenopacket to the specified output directory. Args: hg19_vcf_info (VcfFile): VCF file info for hg19 template vcf. hg38_vcf_info (VcfFile): VCF file info for hg38 template vcf. hg19_vcf_dir (Path): The directory containing the hg19 VCF files. hg38_vcf_dir (Path): The directory containing the hg38 VCF files. output_dir (Path): Directory where the updated Phenopacket will be saved. phenopacket_path (Path): Path to the original Phenopacket file. Returns: None \"\"\" phenopacket = phenopacket_reader ( phenopacket_path ) spiked_vcf_file_message = generate_spiked_vcf_file ( output_dir , phenopacket , phenopacket_path , hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir , ) updated_phenopacket = PhenopacketRebuilder ( phenopacket ) . add_spiked_vcf_path ( spiked_vcf_file_message ) write_phenopacket ( updated_phenopacket , phenopacket_path )","title":"spike_and_update_phenopacket()"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.spike_vcf_contents","text":"Spike VCF records with variants obtained from a Phenopacket or Family. Parameters: Name Type Description Default phenopacket Union [ Phenopacket , Family ] Phenopacket or Family containing causative variants. required phenopacket_path Path Path to the Phenopacket file. required hg19_vcf_info VcfFile VCF file info for hg19 template vcf. required hg38_vcf_info VcfFile VCF file info for hg38 template vcf. required hg19_vcf_dir Path The directory containing the hg19 VCF files. required hg38_vcf_dir Path The directory containing the hg38 VCF files. required Returns: Type Description tuple [ str , List [ str ]] A tuple containing: assembly (str): The genome assembly information extracted from VCF header. modified_vcf_contents (List[str]): Modified VCF records with spiked variants. Source code in src/pheval/prepare/create_spiked_vcf.py 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 def spike_vcf_contents ( phenopacket : Union [ Phenopacket , Family ], phenopacket_path : Path , hg19_vcf_info : VcfFile , hg38_vcf_info : VcfFile , hg19_vcf_dir : Path , hg38_vcf_dir : Path , ) -> tuple [ str , List [ str ]]: \"\"\" Spike VCF records with variants obtained from a Phenopacket or Family. Args: phenopacket (Union[Phenopacket, Family]): Phenopacket or Family containing causative variants. phenopacket_path (Path): Path to the Phenopacket file. hg19_vcf_info (VcfFile): VCF file info for hg19 template vcf. hg38_vcf_info (VcfFile): VCF file info for hg38 template vcf. hg19_vcf_dir (Path): The directory containing the hg19 VCF files. hg38_vcf_dir (Path): The directory containing the hg38 VCF files. Returns: A tuple containing: assembly (str): The genome assembly information extracted from VCF header. modified_vcf_contents (List[str]): Modified VCF records with spiked variants. \"\"\" phenopacket_causative_variants = PhenopacketUtil ( phenopacket ) . causative_variants () chosen_template_vcf = select_vcf_template ( phenopacket_path , phenopacket_causative_variants , hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir , ) check_variant_assembly ( phenopacket_causative_variants , chosen_template_vcf . vcf_header , phenopacket_path ) return ( chosen_template_vcf . vcf_header . assembly , VcfSpiker ( chosen_template_vcf . vcf_contents , phenopacket_causative_variants , chosen_template_vcf . vcf_header , ) . construct_vcf ( chosen_template_vcf . vcf_file_name ), )","title":"spike_vcf_contents()"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.spike_vcfs","text":"Create spiked VCF from either a Phenopacket or a Phenopacket directory. Parameters: Name Type Description Default output_dir Path The directory to store the generated spiked VCF file(s). required phenopacket_path Path Path to a single Phenopacket file (optional). required phenopacket_dir Path Path to a directory containing Phenopacket files (optional). required hg19_template_vcf Path Path to the hg19 template VCF file (optional). required hg38_template_vcf Path Path to the hg38 template VCF file (optional). required hg19_vcf_dir Path The directory containing the hg19 VCF files (optional). required hg38_vcf_dir Path The directory containing the hg38 VCF files (optional). required Source code in src/pheval/prepare/create_spiked_vcf.py 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 def spike_vcfs ( output_dir : Path , phenopacket_path : Path , phenopacket_dir : Path , hg19_template_vcf : Path , hg38_template_vcf : Path , hg19_vcf_dir : Path , hg38_vcf_dir : Path , ) -> None : \"\"\" Create spiked VCF from either a Phenopacket or a Phenopacket directory. Args: output_dir (Path): The directory to store the generated spiked VCF file(s). phenopacket_path (Path): Path to a single Phenopacket file (optional). phenopacket_dir (Path): Path to a directory containing Phenopacket files (optional). hg19_template_vcf (Path): Path to the hg19 template VCF file (optional). hg38_template_vcf (Path): Path to the hg38 template VCF file (optional). hg19_vcf_dir (Path): The directory containing the hg19 VCF files (optional). hg38_vcf_dir (Path): The directory containing the hg38 VCF files (optional). \"\"\" if phenopacket_path is not None : create_spiked_vcf ( output_dir , phenopacket_path , hg19_template_vcf , hg38_template_vcf , hg19_vcf_dir , hg38_vcf_dir , ) elif phenopacket_dir is not None : create_spiked_vcfs ( output_dir , phenopacket_dir , hg19_template_vcf , hg38_template_vcf , hg19_vcf_dir , hg38_vcf_dir , )","title":"spike_vcfs()"},{"location":"api/pheval/prepare/custom_exceptions/","text":"InputError Bases: Exception Exception raised for missing required inputs. Source code in src/pheval/prepare/custom_exceptions.py 4 5 6 7 8 9 10 11 12 13 class InputError ( Exception ): \"\"\"Exception raised for missing required inputs.\"\"\" def __init__ ( self , file , message = \"Missing required input\" ): self . file : str = file self . message : str = message super () . __init__ ( self . message ) def __str__ ( self ): return f \" { self . message } -> { self . file } \" MutuallyExclusiveOptionError Bases: Option Exception raised for when Source code in src/pheval/prepare/custom_exceptions.py 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 class MutuallyExclusiveOptionError ( Option ): \"\"\"Exception raised for when\"\"\" def __init__ ( self , * args , ** kwargs ): self . mutually_exclusive = set ( kwargs . pop ( \"mutually_exclusive\" , [])) help_ = kwargs . get ( \"help\" , \"\" ) if self . mutually_exclusive : ex_str = \", \" . join ( self . mutually_exclusive ) kwargs [ \"help\" ] = help_ + ( \" NOTE: This argument is mutually exclusive with \" \" arguments: [\" + ex_str + \"].\" ) super ( MutuallyExclusiveOptionError , self ) . __init__ ( * args , ** kwargs ) def handle_parse_result ( self , ctx , opts , args ): if self . mutually_exclusive . intersection ( opts ) and self . name in opts : raise UsageError ( \"Illegal usage: ` {} ` is mutually exclusive with \" \"arguments ` {} `.\" . format ( self . name , \", \" . join ( self . mutually_exclusive )) ) return super ( MutuallyExclusiveOptionError , self ) . handle_parse_result ( ctx , opts , args )","title":"Custom exceptions"},{"location":"api/pheval/prepare/custom_exceptions/#src.pheval.prepare.custom_exceptions.InputError","text":"Bases: Exception Exception raised for missing required inputs. Source code in src/pheval/prepare/custom_exceptions.py 4 5 6 7 8 9 10 11 12 13 class InputError ( Exception ): \"\"\"Exception raised for missing required inputs.\"\"\" def __init__ ( self , file , message = \"Missing required input\" ): self . file : str = file self . message : str = message super () . __init__ ( self . message ) def __str__ ( self ): return f \" { self . message } -> { self . file } \"","title":"InputError"},{"location":"api/pheval/prepare/custom_exceptions/#src.pheval.prepare.custom_exceptions.MutuallyExclusiveOptionError","text":"Bases: Option Exception raised for when Source code in src/pheval/prepare/custom_exceptions.py 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 class MutuallyExclusiveOptionError ( Option ): \"\"\"Exception raised for when\"\"\" def __init__ ( self , * args , ** kwargs ): self . mutually_exclusive = set ( kwargs . pop ( \"mutually_exclusive\" , [])) help_ = kwargs . get ( \"help\" , \"\" ) if self . mutually_exclusive : ex_str = \", \" . join ( self . mutually_exclusive ) kwargs [ \"help\" ] = help_ + ( \" NOTE: This argument is mutually exclusive with \" \" arguments: [\" + ex_str + \"].\" ) super ( MutuallyExclusiveOptionError , self ) . __init__ ( * args , ** kwargs ) def handle_parse_result ( self , ctx , opts , args ): if self . mutually_exclusive . intersection ( opts ) and self . name in opts : raise UsageError ( \"Illegal usage: ` {} ` is mutually exclusive with \" \"arguments ` {} `.\" . format ( self . name , \", \" . join ( self . mutually_exclusive )) ) return super ( MutuallyExclusiveOptionError , self ) . handle_parse_result ( ctx , opts , args )","title":"MutuallyExclusiveOptionError"},{"location":"api/pheval/prepare/prepare_corpus/","text":"prepare_corpus ( phenopacket_dir , variant_analysis , gene_analysis , disease_analysis , gene_identifier , hg19_template_vcf , hg38_template_vcf , hg19_vcf_dir , hg38_vcf_dir , output_dir ) Prepare a corpus of Phenopackets for analysis, optionally checking for complete variant records and updating gene identifiers. Parameters: Name Type Description Default phenopacket_dir Path The path to the directory containing Phenopackets. required variant_analysis bool If True, check for complete variant records in the Phenopackets. required gene_analysis bool If True, check for complete gene records in the Phenopackets. required disease_analysis bool If True, check for complete disease records in the Phenopackets. required gene_identifier str Identifier for updating gene identifiers, if applicable. required hg19_template_vcf Path Path to the hg19 template VCF file (optional), to spike variants into required hg38_template_vcf Path Path to the hg38 template VCF file (optional), to spike variants into required hg19_vcf_dir Path Path to the directory containing hg19 template VCF files (optional). required hg38_vcf_dir Path Path to the directory containing hg38 template VCF files (optional). required output_dir Path The directory to save the prepared Phenopackets and, optionally, VCF files. required Notes To spike variants into VCFs for variant-based analysis at least one of hg19_template_vcf, hg38_template_vcf, hg19_vcf_dir or hg38_vcf_dir is required. Source code in src/pheval/prepare/prepare_corpus.py 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 def prepare_corpus ( phenopacket_dir : Path , variant_analysis : bool , gene_analysis : bool , disease_analysis : bool , gene_identifier : str , hg19_template_vcf : Path , hg38_template_vcf : Path , hg19_vcf_dir : Path , hg38_vcf_dir : Path , output_dir : Path , ) -> None : \"\"\" Prepare a corpus of Phenopackets for analysis, optionally checking for complete variant records and updating gene identifiers. Args: phenopacket_dir (Path): The path to the directory containing Phenopackets. variant_analysis (bool): If True, check for complete variant records in the Phenopackets. gene_analysis (bool): If True, check for complete gene records in the Phenopackets. disease_analysis (bool): If True, check for complete disease records in the Phenopackets. gene_identifier (str): Identifier for updating gene identifiers, if applicable. hg19_template_vcf (Path): Path to the hg19 template VCF file (optional), to spike variants into VCFs for variant-based analysis at least one of hg19_template_vcf or hg38_template_vcf is required. hg38_template_vcf (Path): Path to the hg38 template VCF file (optional), to spike variants into VCFs for variant-based analysis at least one of hg19_template_vcf or hg38_template_vcf is required. hg19_vcf_dir (Path): Path to the directory containing hg19 template VCF files (optional). hg38_vcf_dir (Path): Path to the directory containing hg38 template VCF files (optional). output_dir (Path): The directory to save the prepared Phenopackets and, optionally, VCF files. Notes: To spike variants into VCFs for variant-based analysis at least one of hg19_template_vcf, hg38_template_vcf, hg19_vcf_dir or hg38_vcf_dir is required. \"\"\" output_dir . joinpath ( \"phenopackets\" ) . mkdir ( exist_ok = True , parents = True ) for phenopacket_path in all_files ( phenopacket_dir ): phenopacket_util = PhenopacketUtil ( phenopacket_reader ( phenopacket_path )) if not phenopacket_util . observed_phenotypic_features (): info_log . warning ( f \"Removed { phenopacket_path . name } from the corpus due to no observed phenotypic features.\" ) continue if variant_analysis : if phenopacket_util . check_incomplete_variant_record (): info_log . warning ( f \"Removed { phenopacket_path . name } from the corpus due to missing variant fields.\" ) continue if gene_analysis : if phenopacket_util . check_incomplete_gene_record (): info_log . warning ( f \"Removed { phenopacket_path . name } from the corpus due to missing gene fields.\" ) continue if disease_analysis : if phenopacket_util . check_incomplete_disease_record (): info_log . warning ( f \"Removed { phenopacket_path . name } from the corpus due to missing disease fields.\" ) continue if hg19_template_vcf or hg38_template_vcf : output_dir . joinpath ( \"vcf\" ) . mkdir ( exist_ok = True ) create_spiked_vcf ( output_dir . joinpath ( \"vcf\" ), phenopacket_path , hg19_template_vcf , hg38_template_vcf , hg19_vcf_dir , hg38_vcf_dir , ) if gene_identifier : create_updated_phenopacket ( gene_identifier , phenopacket_path , output_dir . joinpath ( \"phenopackets\" ) ) else : # if not updating phenopacket gene identifiers then copy phenopacket as is to output directory shutil . copy ( phenopacket_path , output_dir . joinpath ( f \"phenopackets/ { phenopacket_path . name } \" ) )","title":"Prepare corpus"},{"location":"api/pheval/prepare/prepare_corpus/#src.pheval.prepare.prepare_corpus.prepare_corpus","text":"Prepare a corpus of Phenopackets for analysis, optionally checking for complete variant records and updating gene identifiers. Parameters: Name Type Description Default phenopacket_dir Path The path to the directory containing Phenopackets. required variant_analysis bool If True, check for complete variant records in the Phenopackets. required gene_analysis bool If True, check for complete gene records in the Phenopackets. required disease_analysis bool If True, check for complete disease records in the Phenopackets. required gene_identifier str Identifier for updating gene identifiers, if applicable. required hg19_template_vcf Path Path to the hg19 template VCF file (optional), to spike variants into required hg38_template_vcf Path Path to the hg38 template VCF file (optional), to spike variants into required hg19_vcf_dir Path Path to the directory containing hg19 template VCF files (optional). required hg38_vcf_dir Path Path to the directory containing hg38 template VCF files (optional). required output_dir Path The directory to save the prepared Phenopackets and, optionally, VCF files. required Notes To spike variants into VCFs for variant-based analysis at least one of hg19_template_vcf, hg38_template_vcf, hg19_vcf_dir or hg38_vcf_dir is required. Source code in src/pheval/prepare/prepare_corpus.py 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 def prepare_corpus ( phenopacket_dir : Path , variant_analysis : bool , gene_analysis : bool , disease_analysis : bool , gene_identifier : str , hg19_template_vcf : Path , hg38_template_vcf : Path , hg19_vcf_dir : Path , hg38_vcf_dir : Path , output_dir : Path , ) -> None : \"\"\" Prepare a corpus of Phenopackets for analysis, optionally checking for complete variant records and updating gene identifiers. Args: phenopacket_dir (Path): The path to the directory containing Phenopackets. variant_analysis (bool): If True, check for complete variant records in the Phenopackets. gene_analysis (bool): If True, check for complete gene records in the Phenopackets. disease_analysis (bool): If True, check for complete disease records in the Phenopackets. gene_identifier (str): Identifier for updating gene identifiers, if applicable. hg19_template_vcf (Path): Path to the hg19 template VCF file (optional), to spike variants into VCFs for variant-based analysis at least one of hg19_template_vcf or hg38_template_vcf is required. hg38_template_vcf (Path): Path to the hg38 template VCF file (optional), to spike variants into VCFs for variant-based analysis at least one of hg19_template_vcf or hg38_template_vcf is required. hg19_vcf_dir (Path): Path to the directory containing hg19 template VCF files (optional). hg38_vcf_dir (Path): Path to the directory containing hg38 template VCF files (optional). output_dir (Path): The directory to save the prepared Phenopackets and, optionally, VCF files. Notes: To spike variants into VCFs for variant-based analysis at least one of hg19_template_vcf, hg38_template_vcf, hg19_vcf_dir or hg38_vcf_dir is required. \"\"\" output_dir . joinpath ( \"phenopackets\" ) . mkdir ( exist_ok = True , parents = True ) for phenopacket_path in all_files ( phenopacket_dir ): phenopacket_util = PhenopacketUtil ( phenopacket_reader ( phenopacket_path )) if not phenopacket_util . observed_phenotypic_features (): info_log . warning ( f \"Removed { phenopacket_path . name } from the corpus due to no observed phenotypic features.\" ) continue if variant_analysis : if phenopacket_util . check_incomplete_variant_record (): info_log . warning ( f \"Removed { phenopacket_path . name } from the corpus due to missing variant fields.\" ) continue if gene_analysis : if phenopacket_util . check_incomplete_gene_record (): info_log . warning ( f \"Removed { phenopacket_path . name } from the corpus due to missing gene fields.\" ) continue if disease_analysis : if phenopacket_util . check_incomplete_disease_record (): info_log . warning ( f \"Removed { phenopacket_path . name } from the corpus due to missing disease fields.\" ) continue if hg19_template_vcf or hg38_template_vcf : output_dir . joinpath ( \"vcf\" ) . mkdir ( exist_ok = True ) create_spiked_vcf ( output_dir . joinpath ( \"vcf\" ), phenopacket_path , hg19_template_vcf , hg38_template_vcf , hg19_vcf_dir , hg38_vcf_dir , ) if gene_identifier : create_updated_phenopacket ( gene_identifier , phenopacket_path , output_dir . joinpath ( \"phenopackets\" ) ) else : # if not updating phenopacket gene identifiers then copy phenopacket as is to output directory shutil . copy ( phenopacket_path , output_dir . joinpath ( f \"phenopackets/ { phenopacket_path . name } \" ) )","title":"prepare_corpus()"},{"location":"api/pheval/prepare/update_phenopacket/","text":"create_updated_phenopacket ( gene_identifier , phenopacket_path , output_dir ) Update the gene context within the interpretations for a Phenopacket and writes the updated Phenopacket. Parameters: Name Type Description Default gene_identifier str Identifier used to update the gene context. required phenopacket_path Path The path to the input Phenopacket file. required output_dir Path The directory where the updated Phenopacket will be written. required Notes The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. Source code in src/pheval/prepare/update_phenopacket.py 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 def create_updated_phenopacket ( gene_identifier : str , phenopacket_path : Path , output_dir : Path ) -> None : \"\"\" Update the gene context within the interpretations for a Phenopacket and writes the updated Phenopacket. Args: gene_identifier (str): Identifier used to update the gene context. phenopacket_path (Path): The path to the input Phenopacket file. output_dir (Path): The directory where the updated Phenopacket will be written. Notes: The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. \"\"\" hgnc_data = create_hgnc_dict () updated_phenopacket = update_outdated_gene_context ( phenopacket_path , gene_identifier , hgnc_data ) write_phenopacket ( updated_phenopacket , output_dir . joinpath ( phenopacket_path . name )) create_updated_phenopackets ( gene_identifier , phenopacket_dir , output_dir ) Update the gene context within the interpretations for a directory of Phenopackets and writes the updated Phenopackets. Parameters: Name Type Description Default gene_identifier str Identifier used to update the gene context. required phenopacket_dir Path The path to the input Phenopacket directory. required output_dir Path The directory where the updated Phenopackets will be written. required Notes The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. Source code in src/pheval/prepare/update_phenopacket.py 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 def create_updated_phenopackets ( gene_identifier : str , phenopacket_dir : Path , output_dir : Path ) -> None : \"\"\" Update the gene context within the interpretations for a directory of Phenopackets and writes the updated Phenopackets. Args: gene_identifier (str): Identifier used to update the gene context. phenopacket_dir (Path): The path to the input Phenopacket directory. output_dir (Path): The directory where the updated Phenopackets will be written. Notes: The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. \"\"\" hgnc_data = create_hgnc_dict () for phenopacket_path in all_files ( phenopacket_dir ): updated_phenopacket = update_outdated_gene_context ( phenopacket_path , gene_identifier , hgnc_data ) write_phenopacket ( updated_phenopacket , output_dir . joinpath ( phenopacket_path . name )) update_outdated_gene_context ( phenopacket_path , gene_identifier , hgnc_data ) Update the gene context of the Phenopacket. Parameters: Name Type Description Default phenopacket_path Path The path to the Phenopacket file. required gene_identifier str Identifier to update the gene context. required hgnc_data defaultdict The HGNC data used for updating. required Returns: Type Description Union [ Phenopacket , Family ] Union[Phenopacket, Family]: The updated Phenopacket or Family. Notes This function updates the gene context within the Phenopacket or Family instance. The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. Source code in src/pheval/prepare/update_phenopacket.py 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 def update_outdated_gene_context ( phenopacket_path : Path , gene_identifier : str , hgnc_data : defaultdict ) -> Union [ Phenopacket , Family ]: \"\"\" Update the gene context of the Phenopacket. Args: phenopacket_path (Path): The path to the Phenopacket file. gene_identifier (str): Identifier to update the gene context. hgnc_data (defaultdict): The HGNC data used for updating. Returns: Union[Phenopacket, Family]: The updated Phenopacket or Family. Notes: This function updates the gene context within the Phenopacket or Family instance. The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. \"\"\" phenopacket = phenopacket_reader ( phenopacket_path ) interpretations = PhenopacketUtil ( phenopacket ) . interpretations () updated_interpretations = GeneIdentifierUpdater ( hgnc_data = hgnc_data , gene_identifier = gene_identifier ) . update_genomic_interpretations_gene_identifier ( interpretations , phenopacket_path ) return PhenopacketRebuilder ( phenopacket ) . update_interpretations ( updated_interpretations ) update_phenopackets ( gene_identifier , phenopacket_path , phenopacket_dir , output_dir ) Update the gene identifiers in either a single phenopacket or a directory of phenopackets. Parameters: Name Type Description Default gene_identifier str The gene identifier to be updated. required phenopacket_path Path The path to a single Phenopacket file. required phenopacket_dir Path The directory containing multiple Phenopacket files. required output_dir Path The output directory to save the updated Phenopacket files. required Notes The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. Source code in src/pheval/prepare/update_phenopacket.py 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 def update_phenopackets ( gene_identifier : str , phenopacket_path : Path , phenopacket_dir : Path , output_dir : Path ) -> None : \"\"\" Update the gene identifiers in either a single phenopacket or a directory of phenopackets. Args: gene_identifier (str): The gene identifier to be updated. phenopacket_path (Path): The path to a single Phenopacket file. phenopacket_dir (Path): The directory containing multiple Phenopacket files. output_dir (Path): The output directory to save the updated Phenopacket files. Notes: The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. \"\"\" output_dir . mkdir ( exist_ok = True ) if phenopacket_path is not None : create_updated_phenopacket ( gene_identifier , phenopacket_path , output_dir ) elif phenopacket_dir is not None : create_updated_phenopackets ( gene_identifier , phenopacket_dir , output_dir )","title":"Update phenopacket"},{"location":"api/pheval/prepare/update_phenopacket/#src.pheval.prepare.update_phenopacket.create_updated_phenopacket","text":"Update the gene context within the interpretations for a Phenopacket and writes the updated Phenopacket. Parameters: Name Type Description Default gene_identifier str Identifier used to update the gene context. required phenopacket_path Path The path to the input Phenopacket file. required output_dir Path The directory where the updated Phenopacket will be written. required Notes The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. Source code in src/pheval/prepare/update_phenopacket.py 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 def create_updated_phenopacket ( gene_identifier : str , phenopacket_path : Path , output_dir : Path ) -> None : \"\"\" Update the gene context within the interpretations for a Phenopacket and writes the updated Phenopacket. Args: gene_identifier (str): Identifier used to update the gene context. phenopacket_path (Path): The path to the input Phenopacket file. output_dir (Path): The directory where the updated Phenopacket will be written. Notes: The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. \"\"\" hgnc_data = create_hgnc_dict () updated_phenopacket = update_outdated_gene_context ( phenopacket_path , gene_identifier , hgnc_data ) write_phenopacket ( updated_phenopacket , output_dir . joinpath ( phenopacket_path . name ))","title":"create_updated_phenopacket()"},{"location":"api/pheval/prepare/update_phenopacket/#src.pheval.prepare.update_phenopacket.create_updated_phenopackets","text":"Update the gene context within the interpretations for a directory of Phenopackets and writes the updated Phenopackets. Parameters: Name Type Description Default gene_identifier str Identifier used to update the gene context. required phenopacket_dir Path The path to the input Phenopacket directory. required output_dir Path The directory where the updated Phenopackets will be written. required Notes The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. Source code in src/pheval/prepare/update_phenopacket.py 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 def create_updated_phenopackets ( gene_identifier : str , phenopacket_dir : Path , output_dir : Path ) -> None : \"\"\" Update the gene context within the interpretations for a directory of Phenopackets and writes the updated Phenopackets. Args: gene_identifier (str): Identifier used to update the gene context. phenopacket_dir (Path): The path to the input Phenopacket directory. output_dir (Path): The directory where the updated Phenopackets will be written. Notes: The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. \"\"\" hgnc_data = create_hgnc_dict () for phenopacket_path in all_files ( phenopacket_dir ): updated_phenopacket = update_outdated_gene_context ( phenopacket_path , gene_identifier , hgnc_data ) write_phenopacket ( updated_phenopacket , output_dir . joinpath ( phenopacket_path . name ))","title":"create_updated_phenopackets()"},{"location":"api/pheval/prepare/update_phenopacket/#src.pheval.prepare.update_phenopacket.update_outdated_gene_context","text":"Update the gene context of the Phenopacket. Parameters: Name Type Description Default phenopacket_path Path The path to the Phenopacket file. required gene_identifier str Identifier to update the gene context. required hgnc_data defaultdict The HGNC data used for updating. required Returns: Type Description Union [ Phenopacket , Family ] Union[Phenopacket, Family]: The updated Phenopacket or Family. Notes This function updates the gene context within the Phenopacket or Family instance. The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. Source code in src/pheval/prepare/update_phenopacket.py 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 def update_outdated_gene_context ( phenopacket_path : Path , gene_identifier : str , hgnc_data : defaultdict ) -> Union [ Phenopacket , Family ]: \"\"\" Update the gene context of the Phenopacket. Args: phenopacket_path (Path): The path to the Phenopacket file. gene_identifier (str): Identifier to update the gene context. hgnc_data (defaultdict): The HGNC data used for updating. Returns: Union[Phenopacket, Family]: The updated Phenopacket or Family. Notes: This function updates the gene context within the Phenopacket or Family instance. The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. \"\"\" phenopacket = phenopacket_reader ( phenopacket_path ) interpretations = PhenopacketUtil ( phenopacket ) . interpretations () updated_interpretations = GeneIdentifierUpdater ( hgnc_data = hgnc_data , gene_identifier = gene_identifier ) . update_genomic_interpretations_gene_identifier ( interpretations , phenopacket_path ) return PhenopacketRebuilder ( phenopacket ) . update_interpretations ( updated_interpretations )","title":"update_outdated_gene_context()"},{"location":"api/pheval/prepare/update_phenopacket/#src.pheval.prepare.update_phenopacket.update_phenopackets","text":"Update the gene identifiers in either a single phenopacket or a directory of phenopackets. Parameters: Name Type Description Default gene_identifier str The gene identifier to be updated. required phenopacket_path Path The path to a single Phenopacket file. required phenopacket_dir Path The directory containing multiple Phenopacket files. required output_dir Path The output directory to save the updated Phenopacket files. required Notes The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. Source code in src/pheval/prepare/update_phenopacket.py 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 def update_phenopackets ( gene_identifier : str , phenopacket_path : Path , phenopacket_dir : Path , output_dir : Path ) -> None : \"\"\" Update the gene identifiers in either a single phenopacket or a directory of phenopackets. Args: gene_identifier (str): The gene identifier to be updated. phenopacket_path (Path): The path to a single Phenopacket file. phenopacket_dir (Path): The directory containing multiple Phenopacket files. output_dir (Path): The output directory to save the updated Phenopacket files. Notes: The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. \"\"\" output_dir . mkdir ( exist_ok = True ) if phenopacket_path is not None : create_updated_phenopacket ( gene_identifier , phenopacket_path , output_dir ) elif phenopacket_dir is not None : create_updated_phenopackets ( gene_identifier , phenopacket_dir , output_dir )","title":"update_phenopackets()"},{"location":"api/pheval/runners/runner/","text":"Runners Module DefaultPhEvalRunner Bases: PhEvalRunner DefaultPhEvalRunner Parameters: Name Type Description Default PhEvalRunner PhEvalRunner Abstract PhEvalRunnerClass required Source code in src/pheval/runners/runner.py 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 class DefaultPhEvalRunner ( PhEvalRunner ): \"\"\"DefaultPhEvalRunner Args: PhEvalRunner (PhEvalRunner): Abstract PhEvalRunnerClass \"\"\" input_dir : Path testdata_dir : Path tmp_dir : Path output_dir : Path config_file : Path version : str def prepare ( self ): print ( \"preparing\" ) def run ( self ): print ( \"running\" ) def post_process ( self ): print ( \"post processing\" ) PhEvalRunner dataclass Bases: ABC PhEvalRunner Class Source code in src/pheval/runners/runner.py 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 @dataclass class PhEvalRunner ( ABC ): \"\"\"PhEvalRunner Class\"\"\" input_dir : Path testdata_dir : Path tmp_dir : Path output_dir : Path config_file : Path version : str directory_path = None input_dir_config = None _meta_data = None __raw_results_dir = \"raw_results/\" __pheval_gene_results_dir = \"pheval_gene_results/\" __pheval_variant_results_dir = \"pheval_variant_results/\" __pheval_disease_results_dir = \"pheval_disease_results/\" __tool_input_commands_dir = \"tool_input_commands/\" __run_meta_data_file = \"results.yml\" def __post_init__ ( self ): self . input_dir_config = parse_input_dir_config ( self . input_dir ) def _get_tool ( self ): return self . input_dir_config . tool def _get_variant_analysis ( self ): return self . input_dir_config . variant_analysis def _get_gene_analysis ( self ): return self . input_dir_config . gene_analysis def _get_disease_analysis ( self ): return self . input_dir_config . disease_analysis @property def tool_input_commands_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __tool_input_commands_dir ) @tool_input_commands_dir . setter def tool_input_commands_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def raw_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __raw_results_dir ) @raw_results_dir . setter def raw_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def pheval_gene_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __pheval_gene_results_dir ) @pheval_gene_results_dir . setter def pheval_gene_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def pheval_variant_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __pheval_variant_results_dir ) @pheval_variant_results_dir . setter def pheval_variant_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def pheval_disease_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __pheval_disease_results_dir ) @pheval_disease_results_dir . setter def pheval_disease_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) def build_output_directory_structure ( self ): \"\"\"build output directory structure\"\"\" self . tool_input_commands_dir . mkdir ( exist_ok = True ) self . raw_results_dir . mkdir ( exist_ok = True ) if self . _get_variant_analysis (): self . pheval_variant_results_dir . mkdir ( exist_ok = True ) if self . _get_gene_analysis (): self . pheval_gene_results_dir . mkdir ( exist_ok = True ) if self . _get_disease_analysis (): self . pheval_disease_results_dir . mkdir ( exist_ok = True ) @property def meta_data ( self ): self . _meta_data = BasicOutputRunMetaData ( tool = self . input_dir_config . tool , tool_version = self . version , config = f \" { Path ( self . input_dir ) . parent . name } / { Path ( self . input_dir ) . name } \" , run_timestamp = datetime . now () . timestamp (), corpus = f \" { Path ( self . testdata_dir ) . parent . name } / { Path ( self . testdata_dir ) . name } \" , ) return self . _meta_data @meta_data . setter def meta_data ( self , meta_data ): self . _meta_data = meta_data @abstractmethod def prepare ( self ) -> str : \"\"\"prepare\"\"\" @abstractmethod def run ( self ): \"\"\"run\"\"\" @abstractmethod def post_process ( self ): \"\"\"post_process\"\"\" def construct_meta_data ( self ): \"\"\"Construct run output meta data\"\"\" return self . meta_data build_output_directory_structure () build output directory structure Source code in src/pheval/runners/runner.py 87 88 89 90 91 92 93 94 95 96 def build_output_directory_structure ( self ): \"\"\"build output directory structure\"\"\" self . tool_input_commands_dir . mkdir ( exist_ok = True ) self . raw_results_dir . mkdir ( exist_ok = True ) if self . _get_variant_analysis (): self . pheval_variant_results_dir . mkdir ( exist_ok = True ) if self . _get_gene_analysis (): self . pheval_gene_results_dir . mkdir ( exist_ok = True ) if self . _get_disease_analysis (): self . pheval_disease_results_dir . mkdir ( exist_ok = True ) construct_meta_data () Construct run output meta data Source code in src/pheval/runners/runner.py 125 126 127 def construct_meta_data ( self ): \"\"\"Construct run output meta data\"\"\" return self . meta_data post_process () abstractmethod post_process Source code in src/pheval/runners/runner.py 121 122 123 @abstractmethod def post_process ( self ): \"\"\"post_process\"\"\" prepare () abstractmethod prepare Source code in src/pheval/runners/runner.py 113 114 115 @abstractmethod def prepare ( self ) -> str : \"\"\"prepare\"\"\" run () abstractmethod run Source code in src/pheval/runners/runner.py 117 118 119 @abstractmethod def run ( self ): \"\"\"run\"\"\"","title":"Runner"},{"location":"api/pheval/runners/runner/#src.pheval.runners.runner.DefaultPhEvalRunner","text":"Bases: PhEvalRunner DefaultPhEvalRunner Parameters: Name Type Description Default PhEvalRunner PhEvalRunner Abstract PhEvalRunnerClass required Source code in src/pheval/runners/runner.py 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 class DefaultPhEvalRunner ( PhEvalRunner ): \"\"\"DefaultPhEvalRunner Args: PhEvalRunner (PhEvalRunner): Abstract PhEvalRunnerClass \"\"\" input_dir : Path testdata_dir : Path tmp_dir : Path output_dir : Path config_file : Path version : str def prepare ( self ): print ( \"preparing\" ) def run ( self ): print ( \"running\" ) def post_process ( self ): print ( \"post processing\" )","title":"DefaultPhEvalRunner"},{"location":"api/pheval/runners/runner/#src.pheval.runners.runner.PhEvalRunner","text":"Bases: ABC PhEvalRunner Class Source code in src/pheval/runners/runner.py 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 @dataclass class PhEvalRunner ( ABC ): \"\"\"PhEvalRunner Class\"\"\" input_dir : Path testdata_dir : Path tmp_dir : Path output_dir : Path config_file : Path version : str directory_path = None input_dir_config = None _meta_data = None __raw_results_dir = \"raw_results/\" __pheval_gene_results_dir = \"pheval_gene_results/\" __pheval_variant_results_dir = \"pheval_variant_results/\" __pheval_disease_results_dir = \"pheval_disease_results/\" __tool_input_commands_dir = \"tool_input_commands/\" __run_meta_data_file = \"results.yml\" def __post_init__ ( self ): self . input_dir_config = parse_input_dir_config ( self . input_dir ) def _get_tool ( self ): return self . input_dir_config . tool def _get_variant_analysis ( self ): return self . input_dir_config . variant_analysis def _get_gene_analysis ( self ): return self . input_dir_config . gene_analysis def _get_disease_analysis ( self ): return self . input_dir_config . disease_analysis @property def tool_input_commands_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __tool_input_commands_dir ) @tool_input_commands_dir . setter def tool_input_commands_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def raw_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __raw_results_dir ) @raw_results_dir . setter def raw_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def pheval_gene_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __pheval_gene_results_dir ) @pheval_gene_results_dir . setter def pheval_gene_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def pheval_variant_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __pheval_variant_results_dir ) @pheval_variant_results_dir . setter def pheval_variant_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def pheval_disease_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __pheval_disease_results_dir ) @pheval_disease_results_dir . setter def pheval_disease_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) def build_output_directory_structure ( self ): \"\"\"build output directory structure\"\"\" self . tool_input_commands_dir . mkdir ( exist_ok = True ) self . raw_results_dir . mkdir ( exist_ok = True ) if self . _get_variant_analysis (): self . pheval_variant_results_dir . mkdir ( exist_ok = True ) if self . _get_gene_analysis (): self . pheval_gene_results_dir . mkdir ( exist_ok = True ) if self . _get_disease_analysis (): self . pheval_disease_results_dir . mkdir ( exist_ok = True ) @property def meta_data ( self ): self . _meta_data = BasicOutputRunMetaData ( tool = self . input_dir_config . tool , tool_version = self . version , config = f \" { Path ( self . input_dir ) . parent . name } / { Path ( self . input_dir ) . name } \" , run_timestamp = datetime . now () . timestamp (), corpus = f \" { Path ( self . testdata_dir ) . parent . name } / { Path ( self . testdata_dir ) . name } \" , ) return self . _meta_data @meta_data . setter def meta_data ( self , meta_data ): self . _meta_data = meta_data @abstractmethod def prepare ( self ) -> str : \"\"\"prepare\"\"\" @abstractmethod def run ( self ): \"\"\"run\"\"\" @abstractmethod def post_process ( self ): \"\"\"post_process\"\"\" def construct_meta_data ( self ): \"\"\"Construct run output meta data\"\"\" return self . meta_data","title":"PhEvalRunner"},{"location":"api/pheval/runners/runner/#src.pheval.runners.runner.PhEvalRunner.build_output_directory_structure","text":"build output directory structure Source code in src/pheval/runners/runner.py 87 88 89 90 91 92 93 94 95 96 def build_output_directory_structure ( self ): \"\"\"build output directory structure\"\"\" self . tool_input_commands_dir . mkdir ( exist_ok = True ) self . raw_results_dir . mkdir ( exist_ok = True ) if self . _get_variant_analysis (): self . pheval_variant_results_dir . mkdir ( exist_ok = True ) if self . _get_gene_analysis (): self . pheval_gene_results_dir . mkdir ( exist_ok = True ) if self . _get_disease_analysis (): self . pheval_disease_results_dir . mkdir ( exist_ok = True )","title":"build_output_directory_structure()"},{"location":"api/pheval/runners/runner/#src.pheval.runners.runner.PhEvalRunner.construct_meta_data","text":"Construct run output meta data Source code in src/pheval/runners/runner.py 125 126 127 def construct_meta_data ( self ): \"\"\"Construct run output meta data\"\"\" return self . meta_data","title":"construct_meta_data()"},{"location":"api/pheval/runners/runner/#src.pheval.runners.runner.PhEvalRunner.post_process","text":"post_process Source code in src/pheval/runners/runner.py 121 122 123 @abstractmethod def post_process ( self ): \"\"\"post_process\"\"\"","title":"post_process()"},{"location":"api/pheval/runners/runner/#src.pheval.runners.runner.PhEvalRunner.prepare","text":"prepare Source code in src/pheval/runners/runner.py 113 114 115 @abstractmethod def prepare ( self ) -> str : \"\"\"prepare\"\"\"","title":"prepare()"},{"location":"api/pheval/runners/runner/#src.pheval.runners.runner.PhEvalRunner.run","text":"run Source code in src/pheval/runners/runner.py 117 118 119 @abstractmethod def run ( self ): \"\"\"run\"\"\"","title":"run()"},{"location":"api/pheval/utils/exomiser/","text":"semsim_to_exomiserdb ( input_path , object_prefix , subject_prefix , db_path ) ingests semsim file into exomiser phenotypic database Parameters: Name Type Description Default input_path Path semsim input file. e.g phenio-plus-hp-mp.0.semsimian.tsv required object_prefix str object prefix. e.g. MP required subject_prefix str subject prefix e.g HP required db_path Path Exomiser Phenotypic Database Folder Path. (e.g. /exomiser_folder/2209_phenotype/2209_phenotype/) required Source code in src/pheval/utils/exomiser.py 6 7 8 9 10 11 12 13 14 15 16 def semsim_to_exomiserdb ( input_path : Path , object_prefix : str , subject_prefix : str , db_path : Path ): \"\"\"ingests semsim file into exomiser phenotypic database Args: input_path (Path): semsim input file. e.g phenio-plus-hp-mp.0.semsimian.tsv object_prefix (str): object prefix. e.g. MP subject_prefix (str): subject prefix e.g HP db_path (Path): Exomiser Phenotypic Database Folder Path. (e.g. /exomiser_folder/2209_phenotype/2209_phenotype/) \"\"\" exomiserdb = ExomiserDB ( db_path ) exomiserdb . import_from_semsim_file ( input_path , object_prefix , subject_prefix )","title":"Exomiser"},{"location":"api/pheval/utils/exomiser/#src.pheval.utils.exomiser.semsim_to_exomiserdb","text":"ingests semsim file into exomiser phenotypic database Parameters: Name Type Description Default input_path Path semsim input file. e.g phenio-plus-hp-mp.0.semsimian.tsv required object_prefix str object prefix. e.g. MP required subject_prefix str subject prefix e.g HP required db_path Path Exomiser Phenotypic Database Folder Path. (e.g. /exomiser_folder/2209_phenotype/2209_phenotype/) required Source code in src/pheval/utils/exomiser.py 6 7 8 9 10 11 12 13 14 15 16 def semsim_to_exomiserdb ( input_path : Path , object_prefix : str , subject_prefix : str , db_path : Path ): \"\"\"ingests semsim file into exomiser phenotypic database Args: input_path (Path): semsim input file. e.g phenio-plus-hp-mp.0.semsimian.tsv object_prefix (str): object prefix. e.g. MP subject_prefix (str): subject prefix e.g HP db_path (Path): Exomiser Phenotypic Database Folder Path. (e.g. /exomiser_folder/2209_phenotype/2209_phenotype/) \"\"\" exomiserdb = ExomiserDB ( db_path ) exomiserdb . import_from_semsim_file ( input_path , object_prefix , subject_prefix )","title":"semsim_to_exomiserdb()"},{"location":"api/pheval/utils/file_utils/","text":"all_files ( directory ) Obtains all files from a given directory. Parameters: Name Type Description Default directory Path The directory path. required Returns: Type Description list [ Path ] list[Path]: A list of Path objects representing all files in the directory. Source code in src/pheval/utils/file_utils.py 31 32 33 34 35 36 37 38 39 40 41 42 43 def all_files ( directory : Path ) -> list [ Path ]: \"\"\" Obtains all files from a given directory. Args: directory (Path): The directory path. Returns: list[Path]: A list of Path objects representing all files in the directory. \"\"\" files = [ file_path for file_path in directory . iterdir ()] files . sort () return files ensure_columns_exists ( cols , dataframes , err_message = '' ) Ensures the columns exist in dataframes passed as argument (e.g) \" ensure_columns_exists( cols=['column_a', 'column_b, 'column_c'], err_message=\"Custom error message if any column doesn't exist in any dataframe passed as argument\", dataframes=[data_frame1, data_frame2], ) \" Source code in src/pheval/utils/file_utils.py 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 def ensure_columns_exists ( cols : list , dataframes : List [ pd . DataFrame ], err_message : str = \"\" ): \"\"\"Ensures the columns exist in dataframes passed as argument (e.g) \" ensure_columns_exists( cols=['column_a', 'column_b, 'column_c'], err_message=\"Custom error message if any column doesn't exist in any dataframe passed as argument\", dataframes=[data_frame1, data_frame2], ) \" \"\"\" flat_cols = list ( itertools . chain ( cols )) if not dataframes or not flat_cols : return if err_message : err_msg = f \"\"\"columns: { \", \" . join ( flat_cols [: - 1 ]) } and { flat_cols [ - 1 ] } { err_message } \"\"\" else : err_msg = f \"\"\"columns: { \", \" . join ( flat_cols [: - 1 ]) } and { flat_cols [ - 1 ] } \\ - must be present in both left and right files\"\"\" for dataframe in dataframes : if not all ( x in dataframe . columns for x in flat_cols ): raise ValueError ( err_msg ) ensure_file_exists ( * files ) Ensures the existence of files passed as parameter Raises: Type Description FileNotFoundError If any file passed as a parameter doesn't exist a FileNotFound Exception will be raised Source code in src/pheval/utils/file_utils.py 73 74 75 76 77 78 79 80 def ensure_file_exists ( * files : str ): \"\"\"Ensures the existence of files passed as parameter Raises: FileNotFoundError: If any file passed as a parameter doesn't exist a FileNotFound Exception will be raised \"\"\" for file in files : if not path . isfile ( file ): raise FileNotFoundError ( f \"File { file } not found\" ) files_with_suffix ( directory , suffix ) Obtains all files ending in a specified suffix from a given directory. Parameters: Name Type Description Default directory Path The directory path. required suffix str The specified suffix to filter files. required Returns: Type Description list [ Path ] list[Path]: A list of Path objects representing files with the specified suffix. Source code in src/pheval/utils/file_utils.py 15 16 17 18 19 20 21 22 23 24 25 26 27 28 def files_with_suffix ( directory : Path , suffix : str ) -> list [ Path ]: \"\"\" Obtains all files ending in a specified suffix from a given directory. Args: directory (Path): The directory path. suffix (str): The specified suffix to filter files. Returns: list[Path]: A list of Path objects representing files with the specified suffix. \"\"\" files = [ file_path for file_path in directory . iterdir () if file_path . suffix == suffix ] files . sort () return files is_gzipped ( file_path ) Confirms whether a file is gzipped. Parameters: Name Type Description Default file_path Path The path to the file. required Returns: Name Type Description bool bool True if the file is gzipped, False otherwise. Source code in src/pheval/utils/file_utils.py 46 47 48 49 50 51 52 53 54 55 56 def is_gzipped ( file_path : Path ) -> bool : \"\"\" Confirms whether a file is gzipped. Args: file_path (Path): The path to the file. Returns: bool: True if the file is gzipped, False otherwise. \"\"\" return file_path . name . endswith ( \".gz\" ) normalise_file_name ( file_path ) Normalises the file name by removing diacritical marks (accents) from Unicode characters. Parameters: Name Type Description Default file_path Path The path to the file. required Returns: Name Type Description str str The normalised file name without diacritical marks. Source code in src/pheval/utils/file_utils.py 59 60 61 62 63 64 65 66 67 68 69 70 def normalise_file_name ( file_path : Path ) -> str : \"\"\" Normalises the file name by removing diacritical marks (accents) from Unicode characters. Args: file_path (Path): The path to the file. Returns: str: The normalised file name without diacritical marks. \"\"\" normalised_file_name = unicodedata . normalize ( \"NFD\" , str ( file_path )) return re . sub ( \"[ \\u0300 - \\u036f ]\" , \"\" , normalised_file_name ) write_metadata ( output_dir , meta_data ) Write the metadata for a run to a YAML file. Parameters: Name Type Description Default output_dir Path The directory where the metadata file will be saved. required meta_data BasicOutputRunMetaData The metadata to be written. required Source code in src/pheval/utils/file_utils.py 108 109 110 111 112 113 114 115 116 117 118 def write_metadata ( output_dir : Path , meta_data : BasicOutputRunMetaData ) -> None : \"\"\" Write the metadata for a run to a YAML file. Args: output_dir (Path): The directory where the metadata file will be saved. meta_data (BasicOutputRunMetaData): The metadata to be written. \"\"\" with open ( Path ( output_dir ) . joinpath ( \"results.yml\" ), \"w\" ) as metadata_file : yaml . dump ( to_dict ( meta_data ), metadata_file , sort_keys = False , default_style = \"\" ) metadata_file . close ()","title":"File utils"},{"location":"api/pheval/utils/file_utils/#src.pheval.utils.file_utils.all_files","text":"Obtains all files from a given directory. Parameters: Name Type Description Default directory Path The directory path. required Returns: Type Description list [ Path ] list[Path]: A list of Path objects representing all files in the directory. Source code in src/pheval/utils/file_utils.py 31 32 33 34 35 36 37 38 39 40 41 42 43 def all_files ( directory : Path ) -> list [ Path ]: \"\"\" Obtains all files from a given directory. Args: directory (Path): The directory path. Returns: list[Path]: A list of Path objects representing all files in the directory. \"\"\" files = [ file_path for file_path in directory . iterdir ()] files . sort () return files","title":"all_files()"},{"location":"api/pheval/utils/file_utils/#src.pheval.utils.file_utils.ensure_columns_exists","text":"Ensures the columns exist in dataframes passed as argument (e.g) \" ensure_columns_exists( cols=['column_a', 'column_b, 'column_c'], err_message=\"Custom error message if any column doesn't exist in any dataframe passed as argument\", dataframes=[data_frame1, data_frame2], ) \" Source code in src/pheval/utils/file_utils.py 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 def ensure_columns_exists ( cols : list , dataframes : List [ pd . DataFrame ], err_message : str = \"\" ): \"\"\"Ensures the columns exist in dataframes passed as argument (e.g) \" ensure_columns_exists( cols=['column_a', 'column_b, 'column_c'], err_message=\"Custom error message if any column doesn't exist in any dataframe passed as argument\", dataframes=[data_frame1, data_frame2], ) \" \"\"\" flat_cols = list ( itertools . chain ( cols )) if not dataframes or not flat_cols : return if err_message : err_msg = f \"\"\"columns: { \", \" . join ( flat_cols [: - 1 ]) } and { flat_cols [ - 1 ] } { err_message } \"\"\" else : err_msg = f \"\"\"columns: { \", \" . join ( flat_cols [: - 1 ]) } and { flat_cols [ - 1 ] } \\ - must be present in both left and right files\"\"\" for dataframe in dataframes : if not all ( x in dataframe . columns for x in flat_cols ): raise ValueError ( err_msg )","title":"ensure_columns_exists()"},{"location":"api/pheval/utils/file_utils/#src.pheval.utils.file_utils.ensure_file_exists","text":"Ensures the existence of files passed as parameter Raises: Type Description FileNotFoundError If any file passed as a parameter doesn't exist a FileNotFound Exception will be raised Source code in src/pheval/utils/file_utils.py 73 74 75 76 77 78 79 80 def ensure_file_exists ( * files : str ): \"\"\"Ensures the existence of files passed as parameter Raises: FileNotFoundError: If any file passed as a parameter doesn't exist a FileNotFound Exception will be raised \"\"\" for file in files : if not path . isfile ( file ): raise FileNotFoundError ( f \"File { file } not found\" )","title":"ensure_file_exists()"},{"location":"api/pheval/utils/file_utils/#src.pheval.utils.file_utils.files_with_suffix","text":"Obtains all files ending in a specified suffix from a given directory. Parameters: Name Type Description Default directory Path The directory path. required suffix str The specified suffix to filter files. required Returns: Type Description list [ Path ] list[Path]: A list of Path objects representing files with the specified suffix. Source code in src/pheval/utils/file_utils.py 15 16 17 18 19 20 21 22 23 24 25 26 27 28 def files_with_suffix ( directory : Path , suffix : str ) -> list [ Path ]: \"\"\" Obtains all files ending in a specified suffix from a given directory. Args: directory (Path): The directory path. suffix (str): The specified suffix to filter files. Returns: list[Path]: A list of Path objects representing files with the specified suffix. \"\"\" files = [ file_path for file_path in directory . iterdir () if file_path . suffix == suffix ] files . sort () return files","title":"files_with_suffix()"},{"location":"api/pheval/utils/file_utils/#src.pheval.utils.file_utils.is_gzipped","text":"Confirms whether a file is gzipped. Parameters: Name Type Description Default file_path Path The path to the file. required Returns: Name Type Description bool bool True if the file is gzipped, False otherwise. Source code in src/pheval/utils/file_utils.py 46 47 48 49 50 51 52 53 54 55 56 def is_gzipped ( file_path : Path ) -> bool : \"\"\" Confirms whether a file is gzipped. Args: file_path (Path): The path to the file. Returns: bool: True if the file is gzipped, False otherwise. \"\"\" return file_path . name . endswith ( \".gz\" )","title":"is_gzipped()"},{"location":"api/pheval/utils/file_utils/#src.pheval.utils.file_utils.normalise_file_name","text":"Normalises the file name by removing diacritical marks (accents) from Unicode characters. Parameters: Name Type Description Default file_path Path The path to the file. required Returns: Name Type Description str str The normalised file name without diacritical marks. Source code in src/pheval/utils/file_utils.py 59 60 61 62 63 64 65 66 67 68 69 70 def normalise_file_name ( file_path : Path ) -> str : \"\"\" Normalises the file name by removing diacritical marks (accents) from Unicode characters. Args: file_path (Path): The path to the file. Returns: str: The normalised file name without diacritical marks. \"\"\" normalised_file_name = unicodedata . normalize ( \"NFD\" , str ( file_path )) return re . sub ( \"[ \\u0300 - \\u036f ]\" , \"\" , normalised_file_name )","title":"normalise_file_name()"},{"location":"api/pheval/utils/file_utils/#src.pheval.utils.file_utils.write_metadata","text":"Write the metadata for a run to a YAML file. Parameters: Name Type Description Default output_dir Path The directory where the metadata file will be saved. required meta_data BasicOutputRunMetaData The metadata to be written. required Source code in src/pheval/utils/file_utils.py 108 109 110 111 112 113 114 115 116 117 118 def write_metadata ( output_dir : Path , meta_data : BasicOutputRunMetaData ) -> None : \"\"\" Write the metadata for a run to a YAML file. Args: output_dir (Path): The directory where the metadata file will be saved. meta_data (BasicOutputRunMetaData): The metadata to be written. \"\"\" with open ( Path ( output_dir ) . joinpath ( \"results.yml\" ), \"w\" ) as metadata_file : yaml . dump ( to_dict ( meta_data ), metadata_file , sort_keys = False , default_style = \"\" ) metadata_file . close ()","title":"write_metadata()"},{"location":"api/pheval/utils/phenopacket_utils/","text":"GeneIdentifierUpdater Class for updating gene identifiers within genomic interpretations. Source code in src/pheval/utils/phenopacket_utils.py 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 class GeneIdentifierUpdater : \"\"\"Class for updating gene identifiers within genomic interpretations.\"\"\" def __init__ ( self , gene_identifier : str , hgnc_data : dict = None , identifier_map : dict = None ): \"\"\" Initialise the GeneIdentifierUpdater. Args: gene_identifier (str): The gene identifier to update to. hgnc_data (dict): A dictionary containing HGNC data (default: None). identifier_map (dict): A dictionary mapping gene identifiers (default: None). \"\"\" self . hgnc_data = hgnc_data self . gene_identifier = gene_identifier self . identifier_map = identifier_map def find_identifier ( self , gene_symbol : str ) -> str : \"\"\" Find the specified gene identifier for a gene symbol. Args: gene_symbol (str): The gene symbol to find the identifier for. Returns: str: The identified gene identifier. \"\"\" if gene_symbol in self . hgnc_data . keys (): return self . hgnc_data [ gene_symbol ][ self . gene_identifier ] else : for _symbol , data in self . hgnc_data . items (): for prev_symbol in data [ \"previous_symbol\" ]: if prev_symbol == gene_symbol : return data [ self . gene_identifier ] def obtain_gene_symbol_from_identifier ( self , query_gene_identifier : str ) -> str : \"\"\" Obtain gene symbol from a gene identifier. Args: query_gene_identifier (str): The gene identifier. Returns: str: The gene symbol corresponding to the identifier. \"\"\" return self . identifier_map [ query_gene_identifier ] def _find_alternate_ids ( self , gene_symbol : str ) -> List [ str ]: \"\"\" Find the alternate IDs for a gene symbol. Args: gene_symbol (str): The gene symbol to find alternate IDs for. Returns: List[str]: List of alternate IDs for the gene symbol. \"\"\" if gene_symbol in self . hgnc_data . keys (): return [ self . hgnc_data [ gene_symbol ][ \"hgnc_id\" ], \"ncbigene:\" + self . hgnc_data [ gene_symbol ][ \"entrez_id\" ], \"ensembl:\" + self . hgnc_data [ gene_symbol ][ \"ensembl_id\" ], \"symbol:\" + gene_symbol , ] else : for symbol , data in self . hgnc_data . items (): for prev_symbol in data [ \"previous_symbol\" ]: if prev_symbol == gene_symbol : return [ data [ \"hgnc_id\" ], \"ncbigene:\" + data [ \"entrez_id\" ], \"ensembl:\" + data [ \"ensembl_id\" ], \"symbol:\" + symbol , ] def update_genomic_interpretations_gene_identifier ( self , interpretations : List [ Interpretation ], phenopacket_path : Path ) -> List [ Interpretation ]: \"\"\" Update the genomic interpretations of a Phenopacket. Args: interpretations (List[Interpretation]): List of Interpretation objects. Returns: List[Interpretation]: Updated list of Interpretation objects. \"\"\" updated_interpretations = copy ( list ( interpretations )) for updated_interpretation in updated_interpretations : for g in updated_interpretation . diagnosis . genomic_interpretations : updated_gene_identifier = self . find_identifier ( g . variant_interpretation . variation_descriptor . gene_context . symbol ) info_log . info ( f \"Updating gene identifier in { phenopacket_path } from \" f \" { g . variant_interpretation . variation_descriptor . gene_context . value_id } \" f \"to { updated_gene_identifier } \" ) g . variant_interpretation . variation_descriptor . gene_context . value_id = ( updated_gene_identifier ) del g . variant_interpretation . variation_descriptor . gene_context . alternate_ids [:] g . variant_interpretation . variation_descriptor . gene_context . alternate_ids . extend ( self . _find_alternate_ids ( g . variant_interpretation . variation_descriptor . gene_context . symbol ) ) return updated_interpretations __init__ ( gene_identifier , hgnc_data = None , identifier_map = None ) Initialise the GeneIdentifierUpdater. Parameters: Name Type Description Default gene_identifier str The gene identifier to update to. required hgnc_data dict A dictionary containing HGNC data (default: None). None identifier_map dict A dictionary mapping gene identifiers (default: None). None Source code in src/pheval/utils/phenopacket_utils.py 641 642 643 644 645 646 647 648 649 650 651 652 653 def __init__ ( self , gene_identifier : str , hgnc_data : dict = None , identifier_map : dict = None ): \"\"\" Initialise the GeneIdentifierUpdater. Args: gene_identifier (str): The gene identifier to update to. hgnc_data (dict): A dictionary containing HGNC data (default: None). identifier_map (dict): A dictionary mapping gene identifiers (default: None). \"\"\" self . hgnc_data = hgnc_data self . gene_identifier = gene_identifier self . identifier_map = identifier_map find_identifier ( gene_symbol ) Find the specified gene identifier for a gene symbol. Parameters: Name Type Description Default gene_symbol str The gene symbol to find the identifier for. required Returns: Name Type Description str str The identified gene identifier. Source code in src/pheval/utils/phenopacket_utils.py 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 def find_identifier ( self , gene_symbol : str ) -> str : \"\"\" Find the specified gene identifier for a gene symbol. Args: gene_symbol (str): The gene symbol to find the identifier for. Returns: str: The identified gene identifier. \"\"\" if gene_symbol in self . hgnc_data . keys (): return self . hgnc_data [ gene_symbol ][ self . gene_identifier ] else : for _symbol , data in self . hgnc_data . items (): for prev_symbol in data [ \"previous_symbol\" ]: if prev_symbol == gene_symbol : return data [ self . gene_identifier ] obtain_gene_symbol_from_identifier ( query_gene_identifier ) Obtain gene symbol from a gene identifier. Parameters: Name Type Description Default query_gene_identifier str The gene identifier. required Returns: Name Type Description str str The gene symbol corresponding to the identifier. Source code in src/pheval/utils/phenopacket_utils.py 673 674 675 676 677 678 679 680 681 682 683 def obtain_gene_symbol_from_identifier ( self , query_gene_identifier : str ) -> str : \"\"\" Obtain gene symbol from a gene identifier. Args: query_gene_identifier (str): The gene identifier. Returns: str: The gene symbol corresponding to the identifier. \"\"\" return self . identifier_map [ query_gene_identifier ] update_genomic_interpretations_gene_identifier ( interpretations , phenopacket_path ) Update the genomic interpretations of a Phenopacket. Parameters: Name Type Description Default interpretations List [ Interpretation ] List of Interpretation objects. required Returns: Type Description List [ Interpretation ] List[Interpretation]: Updated list of Interpretation objects. Source code in src/pheval/utils/phenopacket_utils.py 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 def update_genomic_interpretations_gene_identifier ( self , interpretations : List [ Interpretation ], phenopacket_path : Path ) -> List [ Interpretation ]: \"\"\" Update the genomic interpretations of a Phenopacket. Args: interpretations (List[Interpretation]): List of Interpretation objects. Returns: List[Interpretation]: Updated list of Interpretation objects. \"\"\" updated_interpretations = copy ( list ( interpretations )) for updated_interpretation in updated_interpretations : for g in updated_interpretation . diagnosis . genomic_interpretations : updated_gene_identifier = self . find_identifier ( g . variant_interpretation . variation_descriptor . gene_context . symbol ) info_log . info ( f \"Updating gene identifier in { phenopacket_path } from \" f \" { g . variant_interpretation . variation_descriptor . gene_context . value_id } \" f \"to { updated_gene_identifier } \" ) g . variant_interpretation . variation_descriptor . gene_context . value_id = ( updated_gene_identifier ) del g . variant_interpretation . variation_descriptor . gene_context . alternate_ids [:] g . variant_interpretation . variation_descriptor . gene_context . alternate_ids . extend ( self . _find_alternate_ids ( g . variant_interpretation . variation_descriptor . gene_context . symbol ) ) return updated_interpretations GenomicVariant dataclass Represents a genomic variant. Parameters: Name Type Description Default chrom str The chromosome position of the variant recommended to be provided in the following format. required pos int Position of the variant following VCF convention. required ref str Reference allele following VCF convention. required alt str Alternate allele following VCF convention. required Source code in src/pheval/utils/phenopacket_utils.py 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 @dataclass class GenomicVariant : \"\"\" Represents a genomic variant. Args: chrom (str): The chromosome position of the variant recommended to be provided in the following format. This includes numerical designations from 1 to 22 representing autosomal chromosomes, as well as the sex chromosomes X and Y, and the mitochondrial chromosome MT. pos (int): Position of the variant following VCF convention. ref (str): Reference allele following VCF convention. alt (str): Alternate allele following VCF convention. \"\"\" chrom : str pos : int ref : str alt : str IncompatibleGenomeAssemblyError Bases: Exception Exception raised for incompatible genome assembly. Source code in src/pheval/utils/phenopacket_utils.py 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 class IncompatibleGenomeAssemblyError ( Exception ): \"\"\"Exception raised for incompatible genome assembly.\"\"\" def __init__ ( self , assembly , phenopacket , message = \"Incompatible Genome Assembly\" ): \"\"\" Initialise IncompatibleGenomeAssemblyError. Attributes: assembly (str): Incompatible genome assembly encountered. phenopacket (Path): Path to the Phenopacket associated with the error. message (str, optional): Custom error message (default is \"Incompatible Genome Assembly\"). \"\"\" self . assembly : str = assembly self . phenopacket : Path = phenopacket self . message : str = message super () . __init__ ( self . message ) def __str__ ( self ): return f \" { self . message } -> { self . assembly } in { self . phenopacket } \" __init__ ( assembly , phenopacket , message = 'Incompatible Genome Assembly' ) Initialise IncompatibleGenomeAssemblyError. Attributes: Name Type Description assembly str Incompatible genome assembly encountered. phenopacket Path Path to the Phenopacket associated with the error. message str Custom error message (default is \"Incompatible Genome Assembly\"). Source code in src/pheval/utils/phenopacket_utils.py 30 31 32 33 34 35 36 37 38 39 40 41 42 def __init__ ( self , assembly , phenopacket , message = \"Incompatible Genome Assembly\" ): \"\"\" Initialise IncompatibleGenomeAssemblyError. Attributes: assembly (str): Incompatible genome assembly encountered. phenopacket (Path): Path to the Phenopacket associated with the error. message (str, optional): Custom error message (default is \"Incompatible Genome Assembly\"). \"\"\" self . assembly : str = assembly self . phenopacket : Path = phenopacket self . message : str = message super () . __init__ ( self . message ) PhenopacketRebuilder Class for rebuilding a Phenopacket Source code in src/pheval/utils/phenopacket_utils.py 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 class PhenopacketRebuilder : \"\"\"Class for rebuilding a Phenopacket\"\"\" def __init__ ( self , phenopacket : Union [ Phenopacket , Family ]): \"\"\"Initialise PhenopacketUtil Attributes: phenopacket (Union[Phenopacket, Family]): Phenopacket or Family object \"\"\" self . phenopacket = phenopacket def update_interpretations ( self , interpretations : [ Interpretation ] ) -> Union [ Phenopacket , Family ]: \"\"\" Add the updated interpretations to a Phenopacket or Family. Args: interpretations (List[Interpretation]): The updated interpretations to be added. Returns: Union[Phenopacket, Family]: The Phenopacket or Family object with updated interpretations. \"\"\" phenopacket = copy ( self . phenopacket ) if hasattr ( phenopacket , \"proband\" ): del phenopacket . proband . interpretations [:] phenopacket . proband . interpretations . extend ( interpretations ) else : del phenopacket . interpretations [:] phenopacket . interpretations . extend ( interpretations ) return phenopacket def add_randomised_hpo ( self , randomised_hpo : [ PhenotypicFeature ]) -> Union [ Phenopacket , Family ]: \"\"\" Add randomised phenotypic profiles to a Phenopacket or Family. Args: randomised_hpo: The randomised phenotypic profiles to be added. Returns: Union[Phenopacket, Family] The Phenopacket or Family object with added randomised profiles. \"\"\" phenopacket = copy ( self . phenopacket ) if hasattr ( phenopacket , \"proband\" ): del phenopacket . proband . phenotypic_features [:] phenopacket . proband . phenotypic_features . extend ( randomised_hpo ) else : del phenopacket . phenotypic_features [:] phenopacket . phenotypic_features . extend ( randomised_hpo ) return phenopacket def add_spiked_vcf_path ( self , spiked_vcf_file_data : File ) -> Union [ Phenopacket , Family ]: \"\"\" Add a spiked VCF path to a Phenopacket or Family. Args: - spiked_vcf_file_data (File): The VCF file data to be added. Returns: - Phenopacket or Family: The Phenopacket or Family object with the added spiked VCF path. \"\"\" phenopacket = copy ( self . phenopacket ) phenopacket_files = [ file for file in phenopacket . files if file . file_attributes [ \"fileFormat\" ] != \"vcf\" ] phenopacket_files . append ( spiked_vcf_file_data ) del phenopacket . files [:] phenopacket . files . extend ( phenopacket_files ) return phenopacket __init__ ( phenopacket ) Initialise PhenopacketUtil Attributes: Name Type Description phenopacket Union [ Phenopacket , Family ] Phenopacket or Family object Source code in src/pheval/utils/phenopacket_utils.py 540 541 542 543 544 545 546 def __init__ ( self , phenopacket : Union [ Phenopacket , Family ]): \"\"\"Initialise PhenopacketUtil Attributes: phenopacket (Union[Phenopacket, Family]): Phenopacket or Family object \"\"\" self . phenopacket = phenopacket add_randomised_hpo ( randomised_hpo ) Add randomised phenotypic profiles to a Phenopacket or Family. Parameters: Name Type Description Default randomised_hpo [ PhenotypicFeature ] The randomised phenotypic profiles to be added. required Returns: Type Description Union [ Phenopacket , Family ] Union[Phenopacket, Family] The Phenopacket or Family object with added randomised profiles. Source code in src/pheval/utils/phenopacket_utils.py 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 def add_randomised_hpo ( self , randomised_hpo : [ PhenotypicFeature ]) -> Union [ Phenopacket , Family ]: \"\"\" Add randomised phenotypic profiles to a Phenopacket or Family. Args: randomised_hpo: The randomised phenotypic profiles to be added. Returns: Union[Phenopacket, Family] The Phenopacket or Family object with added randomised profiles. \"\"\" phenopacket = copy ( self . phenopacket ) if hasattr ( phenopacket , \"proband\" ): del phenopacket . proband . phenotypic_features [:] phenopacket . proband . phenotypic_features . extend ( randomised_hpo ) else : del phenopacket . phenotypic_features [:] phenopacket . phenotypic_features . extend ( randomised_hpo ) return phenopacket add_spiked_vcf_path ( spiked_vcf_file_data ) Add a spiked VCF path to a Phenopacket or Family. spiked_vcf_file_data (File): The VCF file data to be added. Phenopacket or Family: The Phenopacket or Family object with the added spiked VCF path. Source code in src/pheval/utils/phenopacket_utils.py 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 def add_spiked_vcf_path ( self , spiked_vcf_file_data : File ) -> Union [ Phenopacket , Family ]: \"\"\" Add a spiked VCF path to a Phenopacket or Family. Args: - spiked_vcf_file_data (File): The VCF file data to be added. Returns: - Phenopacket or Family: The Phenopacket or Family object with the added spiked VCF path. \"\"\" phenopacket = copy ( self . phenopacket ) phenopacket_files = [ file for file in phenopacket . files if file . file_attributes [ \"fileFormat\" ] != \"vcf\" ] phenopacket_files . append ( spiked_vcf_file_data ) del phenopacket . files [:] phenopacket . files . extend ( phenopacket_files ) return phenopacket update_interpretations ( interpretations ) Add the updated interpretations to a Phenopacket or Family. Parameters: Name Type Description Default interpretations List [ Interpretation ] The updated interpretations to be added. required Returns: Type Description Union [ Phenopacket , Family ] Union[Phenopacket, Family]: The Phenopacket or Family object with updated interpretations. Source code in src/pheval/utils/phenopacket_utils.py 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 def update_interpretations ( self , interpretations : [ Interpretation ] ) -> Union [ Phenopacket , Family ]: \"\"\" Add the updated interpretations to a Phenopacket or Family. Args: interpretations (List[Interpretation]): The updated interpretations to be added. Returns: Union[Phenopacket, Family]: The Phenopacket or Family object with updated interpretations. \"\"\" phenopacket = copy ( self . phenopacket ) if hasattr ( phenopacket , \"proband\" ): del phenopacket . proband . interpretations [:] phenopacket . proband . interpretations . extend ( interpretations ) else : del phenopacket . interpretations [:] phenopacket . interpretations . extend ( interpretations ) return phenopacket PhenopacketUtil Class for retrieving data from a Phenopacket or Family object Source code in src/pheval/utils/phenopacket_utils.py 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 class PhenopacketUtil : \"\"\"Class for retrieving data from a Phenopacket or Family object\"\"\" def __init__ ( self , phenopacket_contents : Union [ Phenopacket , Family ]): \"\"\"Initialise PhenopacketUtil Args: phenopacket_contents (Union[Phenopacket, Family]): Phenopacket or Family object \"\"\" self . phenopacket_contents = phenopacket_contents def sample_id ( self ) -> str : \"\"\" Retrieve the sample ID from a Phenopacket or proband of a Family Returns: str: Sample ID \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . subject . id else : return self . phenopacket_contents . subject . id def phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all HPO terms Returns: List[PhenotypicFeature]: List of HPO terms \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . phenotypic_features else : return self . phenopacket_contents . phenotypic_features def observed_phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all observed HPO terms Returns: List[PhenotypicFeature]: List of observed HPO terms \"\"\" phenotypic_features = [] all_phenotypic_features = self . phenotypic_features () for p in all_phenotypic_features : if p . excluded : continue phenotypic_features . append ( p ) return phenotypic_features def negated_phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all negated HPO terms Returns: List[PhenotypicFeature]: List of negated HPO terms \"\"\" negated_phenotypic_features = [] all_phenotypic_features = self . phenotypic_features () for p in all_phenotypic_features : if p . excluded : negated_phenotypic_features . append ( p ) return negated_phenotypic_features def diseases ( self ) -> List [ Disease ]: \"\"\" Retrieve a list of Diseases associated with the proband Returns: List[Disease]: List of diseases \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . diseases else : return self . phenopacket_contents . diseases def _diagnosis_from_interpretations ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a list of disease diagnoses associated with the proband from the interpretations object Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" diagnoses = [] interpretation = self . interpretations () for i in interpretation : ( diagnoses . append ( ProbandDisease ( disease_name = i . diagnosis . disease . label , disease_identifier = i . diagnosis . disease . id , ) ) if i . diagnosis . disease . label != \"\" and i . diagnosis . disease . id != \"\" else None ) return diagnoses def _diagnosis_from_disease ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a list of disease diagnoses associated with the proband from the diseases object Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" diagnoses = [] for disease in self . diseases (): diagnoses . append ( ProbandDisease ( disease_name = disease . term . label , disease_identifier = disease . term . id ) ) return diagnoses def diagnoses ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a unique list of disease diagnoses associated with the proband from a Phenopacket Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" return list ( set ( self . _diagnosis_from_interpretations () + self . _diagnosis_from_disease ())) def interpretations ( self ) -> List [ Interpretation ]: \"\"\" Retrieve a list of interpretations from a Phenopacket Returns: List[Interpretation]: List of interpretations \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . interpretations else : return self . phenopacket_contents . interpretations def causative_variants ( self ) -> List [ ProbandCausativeVariant ]: \"\"\" Retrieve a list of causative variants listed in a Phenopacket Returns: List[ProbandCausativeVariant]: List of proband causative variants \"\"\" all_variants = [] interpretation = self . interpretations () for i in interpretation : for g in i . diagnosis . genomic_interpretations : vcf_record = g . variant_interpretation . variation_descriptor . vcf_record genotype = g . variant_interpretation . variation_descriptor . allelic_state variant_data = ProbandCausativeVariant ( self . phenopacket_contents . subject . id , vcf_record . genome_assembly , GenomicVariant ( vcf_record . chrom , vcf_record . pos , vcf_record . ref , vcf_record . alt , ), genotype . label , vcf_record . info , ) all_variants . append ( variant_data ) return all_variants def files ( self ) -> List [ File ]: \"\"\" Retrieve a list of files associated with a phenopacket Returns: List[File]: List of files associated with a phenopacket \"\"\" return self . phenopacket_contents . files def vcf_file_data ( self , phenopacket_path : Path , vcf_dir : Path ) -> File : \"\"\" Retrieve the genome assembly and VCF file name from a phenopacket. Args: phenopacket_path (Path): The path to the phenopacket file. vcf_dir (Path): The directory path where the VCF file is stored. Returns: File: The VCF file with updated URI pointing to the specified directory. Raises: IncorrectFileFormatError: If the provided file is not in .vcf or .vcf.gz format. IncompatibleGenomeAssemblyError: If the genome assembly of the VCF file is not compatible. Note: This function searches for a VCF file within the provided list of files, validates its format, and checks if the genome assembly is compatible. If the conditions are met, it updates the URI of the VCF file to the specified directory and returns the modified file object. \"\"\" compatible_genome_assembly = [ \"GRCh37\" , \"hg19\" , \"GRCh38\" , \"hg38\" ] vcf_data = [ file for file in self . files () if file . file_attributes [ \"fileFormat\" ] == \"vcf\" ][ 0 ] if not Path ( vcf_data . uri ) . name . endswith ( \".vcf\" ) and not Path ( vcf_data . uri ) . name . endswith ( \".vcf.gz\" ): raise IncorrectFileFormatError ( Path ( vcf_data . uri ), \".vcf or .vcf.gz file\" ) if vcf_data . file_attributes [ \"genomeAssembly\" ] not in compatible_genome_assembly : raise IncompatibleGenomeAssemblyError ( vcf_data . file_attributes [ \"genomeAssembly\" ], phenopacket_path ) vcf_data . uri = str ( vcf_dir . joinpath ( Path ( vcf_data . uri ) . name )) return vcf_data @staticmethod def _extract_diagnosed_gene ( genomic_interpretation : GenomicInterpretation , ) -> ProbandCausativeGene : \"\"\" Retrieve the disease causing genes from the variant descriptor field if not empty, otherwise, retrieves from the gene descriptor from a phenopacket. Args: genomic_interpretation (GenomicInterpretation): A genomic interpretation from a Phenopacket Returns: ProbandCausativeGene: The disease causing gene \"\"\" if genomic_interpretation . variant_interpretation . ByteSize () != 0 : return ProbandCausativeGene ( genomic_interpretation . variant_interpretation . variation_descriptor . gene_context . symbol , genomic_interpretation . variant_interpretation . variation_descriptor . gene_context . value_id , ) else : return ProbandCausativeGene ( gene_symbol = genomic_interpretation . gene . symbol , gene_identifier = genomic_interpretation . gene . value_id , ) def diagnosed_genes ( self ) -> List [ ProbandCausativeGene ]: \"\"\" Retrieve the disease causing genes from a phenopacket. Returns: List[ProbandCausativeGene]: List of causative genes \"\"\" pheno_interpretation = self . interpretations () genes = [] for i in pheno_interpretation : for g in i . diagnosis . genomic_interpretations : genes . append ( self . _extract_diagnosed_gene ( g )) genes = list ({ gene . gene_symbol : gene for gene in genes } . values ()) return genes def diagnosed_variants ( self ) -> List [ GenomicVariant ]: \"\"\" Retrieve a list of all known causative variants from a phenopacket. Returns: List[GenomicVariant]: List of causative variants \"\"\" variants = [] pheno_interpretation = self . interpretations () for i in pheno_interpretation : for g in i . diagnosis . genomic_interpretations : variant = GenomicVariant ( chrom = str ( g . variant_interpretation . variation_descriptor . vcf_record . chrom . replace ( \"chr\" , \"\" ) ), pos = int ( g . variant_interpretation . variation_descriptor . vcf_record . pos ), ref = g . variant_interpretation . variation_descriptor . vcf_record . ref , alt = g . variant_interpretation . variation_descriptor . vcf_record . alt , ) variants . append ( variant ) return variants def check_incomplete_variant_record ( self ) -> bool : \"\"\" Check if any variant record in the phenopacket has incomplete information. This method iterates through the diagnosed variant records and checks if any of them have missing or incomplete information such as empty chromosome, position, reference, or alternate allele. Returns: bool: True if any variant record is incomplete, False otherwise. \"\"\" variants = self . diagnosed_variants () for variant in variants : if ( variant . chrom == \"\" or variant . pos == 0 or variant . pos == \"\" or variant . ref == \"\" or variant . alt == \"\" ): return True return False def check_incomplete_gene_record ( self ) -> bool : \"\"\" Check if any gene record in the phenopacket has incomplete information. This method iterates through the diagnosed gene records and checks if any of them have missing or incomplete information such as gene name, or gene identifier. Returns: bool: True if any gene record is incomplete, False otherwise. \"\"\" genes = self . diagnosed_genes () for gene in genes : if gene . gene_symbol == \"\" or gene . gene_identifier == \"\" : return True return False def check_incomplete_disease_record ( self ) -> bool : \"\"\" Check if any disease record in the phenopacket has incomplete information. This method iterates through the diagnosed disease records and checks if any of them have missing or incomplete information such as empty disease name, or disease identifier. Returns: bool: True if any disease record is incomplete, False otherwise. \"\"\" if len ( self . diagnoses ()) == 0 : return True return False __init__ ( phenopacket_contents ) Initialise PhenopacketUtil Parameters: Name Type Description Default phenopacket_contents Union [ Phenopacket , Family ] Phenopacket or Family object required Source code in src/pheval/utils/phenopacket_utils.py 222 223 224 225 226 227 228 def __init__ ( self , phenopacket_contents : Union [ Phenopacket , Family ]): \"\"\"Initialise PhenopacketUtil Args: phenopacket_contents (Union[Phenopacket, Family]): Phenopacket or Family object \"\"\" self . phenopacket_contents = phenopacket_contents causative_variants () Retrieve a list of causative variants listed in a Phenopacket Returns: Type Description List [ ProbandCausativeVariant ] List[ProbandCausativeVariant]: List of proband causative variants Source code in src/pheval/utils/phenopacket_utils.py 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 def causative_variants ( self ) -> List [ ProbandCausativeVariant ]: \"\"\" Retrieve a list of causative variants listed in a Phenopacket Returns: List[ProbandCausativeVariant]: List of proband causative variants \"\"\" all_variants = [] interpretation = self . interpretations () for i in interpretation : for g in i . diagnosis . genomic_interpretations : vcf_record = g . variant_interpretation . variation_descriptor . vcf_record genotype = g . variant_interpretation . variation_descriptor . allelic_state variant_data = ProbandCausativeVariant ( self . phenopacket_contents . subject . id , vcf_record . genome_assembly , GenomicVariant ( vcf_record . chrom , vcf_record . pos , vcf_record . ref , vcf_record . alt , ), genotype . label , vcf_record . info , ) all_variants . append ( variant_data ) return all_variants check_incomplete_disease_record () Check if any disease record in the phenopacket has incomplete information. This method iterates through the diagnosed disease records and checks if any of them have missing or incomplete information such as empty disease name, or disease identifier. Returns: Name Type Description bool bool True if any disease record is incomplete, False otherwise. Source code in src/pheval/utils/phenopacket_utils.py 522 523 524 525 526 527 528 529 530 531 532 533 534 def check_incomplete_disease_record ( self ) -> bool : \"\"\" Check if any disease record in the phenopacket has incomplete information. This method iterates through the diagnosed disease records and checks if any of them have missing or incomplete information such as empty disease name, or disease identifier. Returns: bool: True if any disease record is incomplete, False otherwise. \"\"\" if len ( self . diagnoses ()) == 0 : return True return False check_incomplete_gene_record () Check if any gene record in the phenopacket has incomplete information. This method iterates through the diagnosed gene records and checks if any of them have missing or incomplete information such as gene name, or gene identifier. Returns: Name Type Description bool bool True if any gene record is incomplete, False otherwise. Source code in src/pheval/utils/phenopacket_utils.py 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 def check_incomplete_gene_record ( self ) -> bool : \"\"\" Check if any gene record in the phenopacket has incomplete information. This method iterates through the diagnosed gene records and checks if any of them have missing or incomplete information such as gene name, or gene identifier. Returns: bool: True if any gene record is incomplete, False otherwise. \"\"\" genes = self . diagnosed_genes () for gene in genes : if gene . gene_symbol == \"\" or gene . gene_identifier == \"\" : return True return False check_incomplete_variant_record () Check if any variant record in the phenopacket has incomplete information. This method iterates through the diagnosed variant records and checks if any of them have missing or incomplete information such as empty chromosome, position, reference, or alternate allele. Returns: Name Type Description bool bool True if any variant record is incomplete, False otherwise. Source code in src/pheval/utils/phenopacket_utils.py 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 def check_incomplete_variant_record ( self ) -> bool : \"\"\" Check if any variant record in the phenopacket has incomplete information. This method iterates through the diagnosed variant records and checks if any of them have missing or incomplete information such as empty chromosome, position, reference, or alternate allele. Returns: bool: True if any variant record is incomplete, False otherwise. \"\"\" variants = self . diagnosed_variants () for variant in variants : if ( variant . chrom == \"\" or variant . pos == 0 or variant . pos == \"\" or variant . ref == \"\" or variant . alt == \"\" ): return True return False diagnosed_genes () Retrieve the disease causing genes from a phenopacket. Returns: Type Description List [ ProbandCausativeGene ] List[ProbandCausativeGene]: List of causative genes Source code in src/pheval/utils/phenopacket_utils.py 446 447 448 449 450 451 452 453 454 455 456 457 458 def diagnosed_genes ( self ) -> List [ ProbandCausativeGene ]: \"\"\" Retrieve the disease causing genes from a phenopacket. Returns: List[ProbandCausativeGene]: List of causative genes \"\"\" pheno_interpretation = self . interpretations () genes = [] for i in pheno_interpretation : for g in i . diagnosis . genomic_interpretations : genes . append ( self . _extract_diagnosed_gene ( g )) genes = list ({ gene . gene_symbol : gene for gene in genes } . values ()) return genes diagnosed_variants () Retrieve a list of all known causative variants from a phenopacket. Returns: Type Description List [ GenomicVariant ] List[GenomicVariant]: List of causative variants Source code in src/pheval/utils/phenopacket_utils.py 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 def diagnosed_variants ( self ) -> List [ GenomicVariant ]: \"\"\" Retrieve a list of all known causative variants from a phenopacket. Returns: List[GenomicVariant]: List of causative variants \"\"\" variants = [] pheno_interpretation = self . interpretations () for i in pheno_interpretation : for g in i . diagnosis . genomic_interpretations : variant = GenomicVariant ( chrom = str ( g . variant_interpretation . variation_descriptor . vcf_record . chrom . replace ( \"chr\" , \"\" ) ), pos = int ( g . variant_interpretation . variation_descriptor . vcf_record . pos ), ref = g . variant_interpretation . variation_descriptor . vcf_record . ref , alt = g . variant_interpretation . variation_descriptor . vcf_record . alt , ) variants . append ( variant ) return variants diagnoses () Retrieve a unique list of disease diagnoses associated with the proband from a Phenopacket Returns: Type Description List [ ProbandDisease ] List[ProbandDisease]: List of diagnosed diseases Source code in src/pheval/utils/phenopacket_utils.py 331 332 333 334 335 336 337 338 def diagnoses ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a unique list of disease diagnoses associated with the proband from a Phenopacket Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" return list ( set ( self . _diagnosis_from_interpretations () + self . _diagnosis_from_disease ())) diseases () Retrieve a list of Diseases associated with the proband Returns: Type Description List [ Disease ] List[Disease]: List of diseases Source code in src/pheval/utils/phenopacket_utils.py 283 284 285 286 287 288 289 290 291 292 293 def diseases ( self ) -> List [ Disease ]: \"\"\" Retrieve a list of Diseases associated with the proband Returns: List[Disease]: List of diseases \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . diseases else : return self . phenopacket_contents . diseases files () Retrieve a list of files associated with a phenopacket Returns: Type Description List [ File ] List[File]: List of files associated with a phenopacket Source code in src/pheval/utils/phenopacket_utils.py 380 381 382 383 384 385 386 387 def files ( self ) -> List [ File ]: \"\"\" Retrieve a list of files associated with a phenopacket Returns: List[File]: List of files associated with a phenopacket \"\"\" return self . phenopacket_contents . files interpretations () Retrieve a list of interpretations from a Phenopacket Returns: Type Description List [ Interpretation ] List[Interpretation]: List of interpretations Source code in src/pheval/utils/phenopacket_utils.py 340 341 342 343 344 345 346 347 348 349 350 def interpretations ( self ) -> List [ Interpretation ]: \"\"\" Retrieve a list of interpretations from a Phenopacket Returns: List[Interpretation]: List of interpretations \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . interpretations else : return self . phenopacket_contents . interpretations negated_phenotypic_features () Retrieve a list of all negated HPO terms Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: List of negated HPO terms Source code in src/pheval/utils/phenopacket_utils.py 269 270 271 272 273 274 275 276 277 278 279 280 281 def negated_phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all negated HPO terms Returns: List[PhenotypicFeature]: List of negated HPO terms \"\"\" negated_phenotypic_features = [] all_phenotypic_features = self . phenotypic_features () for p in all_phenotypic_features : if p . excluded : negated_phenotypic_features . append ( p ) return negated_phenotypic_features observed_phenotypic_features () Retrieve a list of all observed HPO terms Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: List of observed HPO terms Source code in src/pheval/utils/phenopacket_utils.py 254 255 256 257 258 259 260 261 262 263 264 265 266 267 def observed_phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all observed HPO terms Returns: List[PhenotypicFeature]: List of observed HPO terms \"\"\" phenotypic_features = [] all_phenotypic_features = self . phenotypic_features () for p in all_phenotypic_features : if p . excluded : continue phenotypic_features . append ( p ) return phenotypic_features phenotypic_features () Retrieve a list of all HPO terms Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: List of HPO terms Source code in src/pheval/utils/phenopacket_utils.py 242 243 244 245 246 247 248 249 250 251 252 def phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all HPO terms Returns: List[PhenotypicFeature]: List of HPO terms \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . phenotypic_features else : return self . phenopacket_contents . phenotypic_features sample_id () Retrieve the sample ID from a Phenopacket or proband of a Family Returns: Name Type Description str str Sample ID Source code in src/pheval/utils/phenopacket_utils.py 230 231 232 233 234 235 236 237 238 239 240 def sample_id ( self ) -> str : \"\"\" Retrieve the sample ID from a Phenopacket or proband of a Family Returns: str: Sample ID \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . subject . id else : return self . phenopacket_contents . subject . id vcf_file_data ( phenopacket_path , vcf_dir ) Retrieve the genome assembly and VCF file name from a phenopacket. Parameters: Name Type Description Default phenopacket_path Path The path to the phenopacket file. required vcf_dir Path The directory path where the VCF file is stored. required Returns: Name Type Description File File The VCF file with updated URI pointing to the specified directory. Raises: Type Description IncorrectFileFormatError If the provided file is not in .vcf or .vcf.gz format. IncompatibleGenomeAssemblyError If the genome assembly of the VCF file is not compatible. Note This function searches for a VCF file within the provided list of files, validates its format, and checks if the genome assembly is compatible. If the conditions are met, it updates the URI of the VCF file to the specified directory and returns the modified file object. Source code in src/pheval/utils/phenopacket_utils.py 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 def vcf_file_data ( self , phenopacket_path : Path , vcf_dir : Path ) -> File : \"\"\" Retrieve the genome assembly and VCF file name from a phenopacket. Args: phenopacket_path (Path): The path to the phenopacket file. vcf_dir (Path): The directory path where the VCF file is stored. Returns: File: The VCF file with updated URI pointing to the specified directory. Raises: IncorrectFileFormatError: If the provided file is not in .vcf or .vcf.gz format. IncompatibleGenomeAssemblyError: If the genome assembly of the VCF file is not compatible. Note: This function searches for a VCF file within the provided list of files, validates its format, and checks if the genome assembly is compatible. If the conditions are met, it updates the URI of the VCF file to the specified directory and returns the modified file object. \"\"\" compatible_genome_assembly = [ \"GRCh37\" , \"hg19\" , \"GRCh38\" , \"hg38\" ] vcf_data = [ file for file in self . files () if file . file_attributes [ \"fileFormat\" ] == \"vcf\" ][ 0 ] if not Path ( vcf_data . uri ) . name . endswith ( \".vcf\" ) and not Path ( vcf_data . uri ) . name . endswith ( \".vcf.gz\" ): raise IncorrectFileFormatError ( Path ( vcf_data . uri ), \".vcf or .vcf.gz file\" ) if vcf_data . file_attributes [ \"genomeAssembly\" ] not in compatible_genome_assembly : raise IncompatibleGenomeAssemblyError ( vcf_data . file_attributes [ \"genomeAssembly\" ], phenopacket_path ) vcf_data . uri = str ( vcf_dir . joinpath ( Path ( vcf_data . uri ) . name )) return vcf_data ProbandCausativeGene dataclass Represents a causative gene associated with a proband Parameters: Name Type Description Default gene_symbol str Symbol representing the gene required gene_identifier str The ENSEMBL gene identifier for the result entry required Notes While we recommend providing the gene identifier in the ENSEMBL namespace, any matching format used in Phenopacket interpretations and result output is acceptable for result matching purposes in the analysis. Source code in src/pheval/utils/phenopacket_utils.py 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 @dataclass class ProbandCausativeGene : \"\"\" Represents a causative gene associated with a proband Args: gene_symbol (str): Symbol representing the gene gene_identifier (str): The ENSEMBL gene identifier for the result entry Notes: While we recommend providing the gene identifier in the ENSEMBL namespace, any matching format used in Phenopacket interpretations and result output is acceptable for result matching purposes in the analysis. \"\"\" gene_symbol : str gene_identifier : str ProbandCausativeVariant dataclass Represents a causative variant associated with a proband Parameters: Name Type Description Default proband_id str ID of the proband required assembly str Genome assembly required variant GenomicVariant Genomic variant associated with the proband required genotype str Genotype information for the variant required info str Additional information about the variant (default is an empty string) '' Source code in src/pheval/utils/phenopacket_utils.py 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 @dataclass class ProbandCausativeVariant : \"\"\" Represents a causative variant associated with a proband Args: proband_id (str): ID of the proband assembly (str): Genome assembly variant (GenomicVariant): Genomic variant associated with the proband genotype (str): Genotype information for the variant info (str, optional): Additional information about the variant (default is an empty string) \"\"\" proband_id : str assembly : str variant : GenomicVariant genotype : str info : str = \"\" ProbandDisease dataclass Represents a disease associated with a proband Parameters: Name Type Description Default disease_name str Name of the disease required disease_identifier str Identifier for the disease result entry in the OMIM namespace required Notes While we recommend providing the disease identifier in the OMIM namespace, any matching format used in Phenopacket interpretations and result output is acceptable for result matching purposes in the analysis. Source code in src/pheval/utils/phenopacket_utils.py 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 @dataclass ( frozen = True , eq = True ) class ProbandDisease : \"\"\" Represents a disease associated with a proband Args: disease_name (str): Name of the disease disease_identifier (str): Identifier for the disease result entry in the OMIM namespace Notes: While we recommend providing the disease identifier in the OMIM namespace, any matching format used in Phenopacket interpretations and result output is acceptable for result matching purposes in the analysis. \"\"\" disease_name : str disease_identifier : str create_gene_identifier_map () Create a mapping of gene identifiers to gene symbols using HGNC data. Returns: Name Type Description dict dict A mapping of gene identifiers to gene symbols. Notes The dictionary structure: { 'identifier': 'gene_symbol', ... } Source code in src/pheval/utils/phenopacket_utils.py 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 def create_gene_identifier_map () -> dict : \"\"\" Create a mapping of gene identifiers to gene symbols using HGNC data. Returns: dict: A mapping of gene identifiers to gene symbols. Notes: The dictionary structure: { 'identifier': 'gene_symbol', ... } \"\"\" hgnc_df = read_hgnc_data () identifier_map = {} for _index , row in hgnc_df . iterrows (): identifier_map [ row [ \"ensembl_gene_id\" ]] = row [ \"symbol\" ] identifier_map [ row [ \"hgnc_id\" ]] = row [ \"symbol\" ] identifier_map [ row [ \"entrez_id\" ]] = row [ \"symbol\" ] identifier_map [ row [ \"refseq_accession\" ]] = row [ \"symbol\" ] return identifier_map create_hgnc_dict () Create a dictionary as a reference for updating gene symbols and identifiers based on HGNC data. Returns: Name Type Description defaultdict defaultdict A dictionary containing gene symbols as keys and their associated gene information. Notes The dictionary structure: { 'gene_symbol': { 'ensembl_id': str, 'hgnc_id': str, 'entrez_id': str, 'refseq_accession': str, 'previous_symbol': [str, ...] }, ... } Source code in src/pheval/utils/phenopacket_utils.py 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 def create_hgnc_dict () -> defaultdict : \"\"\" Create a dictionary as a reference for updating gene symbols and identifiers based on HGNC data. Returns: defaultdict: A dictionary containing gene symbols as keys and their associated gene information. Notes: The dictionary structure: { 'gene_symbol': { 'ensembl_id': str, 'hgnc_id': str, 'entrez_id': str, 'refseq_accession': str, 'previous_symbol': [str, ...] }, ... } \"\"\" hgnc_df = read_hgnc_data () hgnc_data = defaultdict ( dict ) for _index , row in hgnc_df . iterrows (): previous_names = [] hgnc_data [ row [ \"symbol\" ]][ \"ensembl_id\" ] = row [ \"ensembl_gene_id\" ] hgnc_data [ row [ \"symbol\" ]][ \"hgnc_id\" ] = row [ \"hgnc_id\" ] hgnc_data [ row [ \"symbol\" ]][ \"entrez_id\" ] = row [ \"entrez_id\" ] hgnc_data [ row [ \"symbol\" ]][ \"refseq_accession\" ] = row [ \"refseq_accession\" ] previous = str ( row [ \"prev_symbol\" ]) . split ( \"|\" ) for p in previous : previous_names . append ( p . strip ( '\"' )) hgnc_data [ row [ \"symbol\" ]][ \"previous_symbol\" ] = previous_names return hgnc_data create_json_message ( phenopacket ) Create a JSON message for writing to a file. phenopacket (Union[Phenopacket, Family]): The Phenopacket or Family object to convert to JSON. str: A JSON-formatted string representation of the Phenopacket or Family object. Source code in src/pheval/utils/phenopacket_utils.py 608 609 610 611 612 613 614 615 616 617 618 def create_json_message ( phenopacket : Union [ Phenopacket , Family ]) -> str : \"\"\" Create a JSON message for writing to a file. Args: - phenopacket (Union[Phenopacket, Family]): The Phenopacket or Family object to convert to JSON. Returns: - str: A JSON-formatted string representation of the Phenopacket or Family object. \"\"\" return MessageToJson ( phenopacket ) phenopacket_reader ( file ) Read a Phenopacket file and returns its contents as a Phenopacket or Family object Parameters: Name Type Description Default file Path Path to the Phenopacket file required Returns: Type Description Union [ Phenopacket , Family ] Union[Phenopacket, Family]: Contents of the Phenopacket file as a Phenopacket or Family object Source code in src/pheval/utils/phenopacket_utils.py 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 def phenopacket_reader ( file : Path ) -> Union [ Phenopacket , Family ]: \"\"\" Read a Phenopacket file and returns its contents as a Phenopacket or Family object Args: file (Path): Path to the Phenopacket file Returns: Union[Phenopacket, Family]: Contents of the Phenopacket file as a Phenopacket or Family object \"\"\" file = open ( file , \"r\" ) phenopacket = json . load ( file ) file . close () if \"proband\" in phenopacket : return Parse ( json . dumps ( phenopacket ), Family ()) else : return Parse ( json . dumps ( phenopacket ), Phenopacket ()) read_hgnc_data () Read HGNC data from a file and return it as a Pandas DataFrame. Returns: Type Description pd . DataFrame pd.DataFrame: DataFrame containing the HGNC data. Source code in src/pheval/utils/phenopacket_utils.py 125 126 127 128 129 130 131 132 133 134 135 136 def read_hgnc_data () -> pd . DataFrame : \"\"\" Read HGNC data from a file and return it as a Pandas DataFrame. Returns: pd.DataFrame: DataFrame containing the HGNC data. \"\"\" return pd . read_csv ( os . path . dirname ( __file__ ) . replace ( \"utils\" , \"resources/hgnc_complete_set.txt\" ), delimiter = \" \\t \" , dtype = str , ) write_phenopacket ( phenopacket , output_file ) Write a Phenopacket or Family object to a file in JSON format. Parameters: Name Type Description Default phenopacket Phenopacket or Family The Phenopacket or Family object to be written. required output_file Path The Path object representing the file to write the Phenopacket data. required Returns: Type Description None None Source code in src/pheval/utils/phenopacket_utils.py 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 def write_phenopacket ( phenopacket : Union [ Phenopacket , Family ], output_file : Path ) -> None : \"\"\" Write a Phenopacket or Family object to a file in JSON format. Args: phenopacket (Phenopacket or Family): The Phenopacket or Family object to be written. output_file (Path): The Path object representing the file to write the Phenopacket data. Returns: None \"\"\" phenopacket_json = create_json_message ( phenopacket ) with open ( output_file , \"w\" ) as outfile : outfile . write ( phenopacket_json ) outfile . close ()","title":"Phenopacket utils"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater","text":"Class for updating gene identifiers within genomic interpretations. Source code in src/pheval/utils/phenopacket_utils.py 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 class GeneIdentifierUpdater : \"\"\"Class for updating gene identifiers within genomic interpretations.\"\"\" def __init__ ( self , gene_identifier : str , hgnc_data : dict = None , identifier_map : dict = None ): \"\"\" Initialise the GeneIdentifierUpdater. Args: gene_identifier (str): The gene identifier to update to. hgnc_data (dict): A dictionary containing HGNC data (default: None). identifier_map (dict): A dictionary mapping gene identifiers (default: None). \"\"\" self . hgnc_data = hgnc_data self . gene_identifier = gene_identifier self . identifier_map = identifier_map def find_identifier ( self , gene_symbol : str ) -> str : \"\"\" Find the specified gene identifier for a gene symbol. Args: gene_symbol (str): The gene symbol to find the identifier for. Returns: str: The identified gene identifier. \"\"\" if gene_symbol in self . hgnc_data . keys (): return self . hgnc_data [ gene_symbol ][ self . gene_identifier ] else : for _symbol , data in self . hgnc_data . items (): for prev_symbol in data [ \"previous_symbol\" ]: if prev_symbol == gene_symbol : return data [ self . gene_identifier ] def obtain_gene_symbol_from_identifier ( self , query_gene_identifier : str ) -> str : \"\"\" Obtain gene symbol from a gene identifier. Args: query_gene_identifier (str): The gene identifier. Returns: str: The gene symbol corresponding to the identifier. \"\"\" return self . identifier_map [ query_gene_identifier ] def _find_alternate_ids ( self , gene_symbol : str ) -> List [ str ]: \"\"\" Find the alternate IDs for a gene symbol. Args: gene_symbol (str): The gene symbol to find alternate IDs for. Returns: List[str]: List of alternate IDs for the gene symbol. \"\"\" if gene_symbol in self . hgnc_data . keys (): return [ self . hgnc_data [ gene_symbol ][ \"hgnc_id\" ], \"ncbigene:\" + self . hgnc_data [ gene_symbol ][ \"entrez_id\" ], \"ensembl:\" + self . hgnc_data [ gene_symbol ][ \"ensembl_id\" ], \"symbol:\" + gene_symbol , ] else : for symbol , data in self . hgnc_data . items (): for prev_symbol in data [ \"previous_symbol\" ]: if prev_symbol == gene_symbol : return [ data [ \"hgnc_id\" ], \"ncbigene:\" + data [ \"entrez_id\" ], \"ensembl:\" + data [ \"ensembl_id\" ], \"symbol:\" + symbol , ] def update_genomic_interpretations_gene_identifier ( self , interpretations : List [ Interpretation ], phenopacket_path : Path ) -> List [ Interpretation ]: \"\"\" Update the genomic interpretations of a Phenopacket. Args: interpretations (List[Interpretation]): List of Interpretation objects. Returns: List[Interpretation]: Updated list of Interpretation objects. \"\"\" updated_interpretations = copy ( list ( interpretations )) for updated_interpretation in updated_interpretations : for g in updated_interpretation . diagnosis . genomic_interpretations : updated_gene_identifier = self . find_identifier ( g . variant_interpretation . variation_descriptor . gene_context . symbol ) info_log . info ( f \"Updating gene identifier in { phenopacket_path } from \" f \" { g . variant_interpretation . variation_descriptor . gene_context . value_id } \" f \"to { updated_gene_identifier } \" ) g . variant_interpretation . variation_descriptor . gene_context . value_id = ( updated_gene_identifier ) del g . variant_interpretation . variation_descriptor . gene_context . alternate_ids [:] g . variant_interpretation . variation_descriptor . gene_context . alternate_ids . extend ( self . _find_alternate_ids ( g . variant_interpretation . variation_descriptor . gene_context . symbol ) ) return updated_interpretations","title":"GeneIdentifierUpdater"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.__init__","text":"Initialise the GeneIdentifierUpdater. Parameters: Name Type Description Default gene_identifier str The gene identifier to update to. required hgnc_data dict A dictionary containing HGNC data (default: None). None identifier_map dict A dictionary mapping gene identifiers (default: None). None Source code in src/pheval/utils/phenopacket_utils.py 641 642 643 644 645 646 647 648 649 650 651 652 653 def __init__ ( self , gene_identifier : str , hgnc_data : dict = None , identifier_map : dict = None ): \"\"\" Initialise the GeneIdentifierUpdater. Args: gene_identifier (str): The gene identifier to update to. hgnc_data (dict): A dictionary containing HGNC data (default: None). identifier_map (dict): A dictionary mapping gene identifiers (default: None). \"\"\" self . hgnc_data = hgnc_data self . gene_identifier = gene_identifier self . identifier_map = identifier_map","title":"__init__()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.find_identifier","text":"Find the specified gene identifier for a gene symbol. Parameters: Name Type Description Default gene_symbol str The gene symbol to find the identifier for. required Returns: Name Type Description str str The identified gene identifier. Source code in src/pheval/utils/phenopacket_utils.py 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 def find_identifier ( self , gene_symbol : str ) -> str : \"\"\" Find the specified gene identifier for a gene symbol. Args: gene_symbol (str): The gene symbol to find the identifier for. Returns: str: The identified gene identifier. \"\"\" if gene_symbol in self . hgnc_data . keys (): return self . hgnc_data [ gene_symbol ][ self . gene_identifier ] else : for _symbol , data in self . hgnc_data . items (): for prev_symbol in data [ \"previous_symbol\" ]: if prev_symbol == gene_symbol : return data [ self . gene_identifier ]","title":"find_identifier()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.obtain_gene_symbol_from_identifier","text":"Obtain gene symbol from a gene identifier. Parameters: Name Type Description Default query_gene_identifier str The gene identifier. required Returns: Name Type Description str str The gene symbol corresponding to the identifier. Source code in src/pheval/utils/phenopacket_utils.py 673 674 675 676 677 678 679 680 681 682 683 def obtain_gene_symbol_from_identifier ( self , query_gene_identifier : str ) -> str : \"\"\" Obtain gene symbol from a gene identifier. Args: query_gene_identifier (str): The gene identifier. Returns: str: The gene symbol corresponding to the identifier. \"\"\" return self . identifier_map [ query_gene_identifier ]","title":"obtain_gene_symbol_from_identifier()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.update_genomic_interpretations_gene_identifier","text":"Update the genomic interpretations of a Phenopacket. Parameters: Name Type Description Default interpretations List [ Interpretation ] List of Interpretation objects. required Returns: Type Description List [ Interpretation ] List[Interpretation]: Updated list of Interpretation objects. Source code in src/pheval/utils/phenopacket_utils.py 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 def update_genomic_interpretations_gene_identifier ( self , interpretations : List [ Interpretation ], phenopacket_path : Path ) -> List [ Interpretation ]: \"\"\" Update the genomic interpretations of a Phenopacket. Args: interpretations (List[Interpretation]): List of Interpretation objects. Returns: List[Interpretation]: Updated list of Interpretation objects. \"\"\" updated_interpretations = copy ( list ( interpretations )) for updated_interpretation in updated_interpretations : for g in updated_interpretation . diagnosis . genomic_interpretations : updated_gene_identifier = self . find_identifier ( g . variant_interpretation . variation_descriptor . gene_context . symbol ) info_log . info ( f \"Updating gene identifier in { phenopacket_path } from \" f \" { g . variant_interpretation . variation_descriptor . gene_context . value_id } \" f \"to { updated_gene_identifier } \" ) g . variant_interpretation . variation_descriptor . gene_context . value_id = ( updated_gene_identifier ) del g . variant_interpretation . variation_descriptor . gene_context . alternate_ids [:] g . variant_interpretation . variation_descriptor . gene_context . alternate_ids . extend ( self . _find_alternate_ids ( g . variant_interpretation . variation_descriptor . gene_context . symbol ) ) return updated_interpretations","title":"update_genomic_interpretations_gene_identifier()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.GenomicVariant","text":"Represents a genomic variant. Parameters: Name Type Description Default chrom str The chromosome position of the variant recommended to be provided in the following format. required pos int Position of the variant following VCF convention. required ref str Reference allele following VCF convention. required alt str Alternate allele following VCF convention. required Source code in src/pheval/utils/phenopacket_utils.py 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 @dataclass class GenomicVariant : \"\"\" Represents a genomic variant. Args: chrom (str): The chromosome position of the variant recommended to be provided in the following format. This includes numerical designations from 1 to 22 representing autosomal chromosomes, as well as the sex chromosomes X and Y, and the mitochondrial chromosome MT. pos (int): Position of the variant following VCF convention. ref (str): Reference allele following VCF convention. alt (str): Alternate allele following VCF convention. \"\"\" chrom : str pos : int ref : str alt : str","title":"GenomicVariant"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.IncompatibleGenomeAssemblyError","text":"Bases: Exception Exception raised for incompatible genome assembly. Source code in src/pheval/utils/phenopacket_utils.py 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 class IncompatibleGenomeAssemblyError ( Exception ): \"\"\"Exception raised for incompatible genome assembly.\"\"\" def __init__ ( self , assembly , phenopacket , message = \"Incompatible Genome Assembly\" ): \"\"\" Initialise IncompatibleGenomeAssemblyError. Attributes: assembly (str): Incompatible genome assembly encountered. phenopacket (Path): Path to the Phenopacket associated with the error. message (str, optional): Custom error message (default is \"Incompatible Genome Assembly\"). \"\"\" self . assembly : str = assembly self . phenopacket : Path = phenopacket self . message : str = message super () . __init__ ( self . message ) def __str__ ( self ): return f \" { self . message } -> { self . assembly } in { self . phenopacket } \"","title":"IncompatibleGenomeAssemblyError"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.IncompatibleGenomeAssemblyError.__init__","text":"Initialise IncompatibleGenomeAssemblyError. Attributes: Name Type Description assembly str Incompatible genome assembly encountered. phenopacket Path Path to the Phenopacket associated with the error. message str Custom error message (default is \"Incompatible Genome Assembly\"). Source code in src/pheval/utils/phenopacket_utils.py 30 31 32 33 34 35 36 37 38 39 40 41 42 def __init__ ( self , assembly , phenopacket , message = \"Incompatible Genome Assembly\" ): \"\"\" Initialise IncompatibleGenomeAssemblyError. Attributes: assembly (str): Incompatible genome assembly encountered. phenopacket (Path): Path to the Phenopacket associated with the error. message (str, optional): Custom error message (default is \"Incompatible Genome Assembly\"). \"\"\" self . assembly : str = assembly self . phenopacket : Path = phenopacket self . message : str = message super () . __init__ ( self . message )","title":"__init__()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketRebuilder","text":"Class for rebuilding a Phenopacket Source code in src/pheval/utils/phenopacket_utils.py 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 class PhenopacketRebuilder : \"\"\"Class for rebuilding a Phenopacket\"\"\" def __init__ ( self , phenopacket : Union [ Phenopacket , Family ]): \"\"\"Initialise PhenopacketUtil Attributes: phenopacket (Union[Phenopacket, Family]): Phenopacket or Family object \"\"\" self . phenopacket = phenopacket def update_interpretations ( self , interpretations : [ Interpretation ] ) -> Union [ Phenopacket , Family ]: \"\"\" Add the updated interpretations to a Phenopacket or Family. Args: interpretations (List[Interpretation]): The updated interpretations to be added. Returns: Union[Phenopacket, Family]: The Phenopacket or Family object with updated interpretations. \"\"\" phenopacket = copy ( self . phenopacket ) if hasattr ( phenopacket , \"proband\" ): del phenopacket . proband . interpretations [:] phenopacket . proband . interpretations . extend ( interpretations ) else : del phenopacket . interpretations [:] phenopacket . interpretations . extend ( interpretations ) return phenopacket def add_randomised_hpo ( self , randomised_hpo : [ PhenotypicFeature ]) -> Union [ Phenopacket , Family ]: \"\"\" Add randomised phenotypic profiles to a Phenopacket or Family. Args: randomised_hpo: The randomised phenotypic profiles to be added. Returns: Union[Phenopacket, Family] The Phenopacket or Family object with added randomised profiles. \"\"\" phenopacket = copy ( self . phenopacket ) if hasattr ( phenopacket , \"proband\" ): del phenopacket . proband . phenotypic_features [:] phenopacket . proband . phenotypic_features . extend ( randomised_hpo ) else : del phenopacket . phenotypic_features [:] phenopacket . phenotypic_features . extend ( randomised_hpo ) return phenopacket def add_spiked_vcf_path ( self , spiked_vcf_file_data : File ) -> Union [ Phenopacket , Family ]: \"\"\" Add a spiked VCF path to a Phenopacket or Family. Args: - spiked_vcf_file_data (File): The VCF file data to be added. Returns: - Phenopacket or Family: The Phenopacket or Family object with the added spiked VCF path. \"\"\" phenopacket = copy ( self . phenopacket ) phenopacket_files = [ file for file in phenopacket . files if file . file_attributes [ \"fileFormat\" ] != \"vcf\" ] phenopacket_files . append ( spiked_vcf_file_data ) del phenopacket . files [:] phenopacket . files . extend ( phenopacket_files ) return phenopacket","title":"PhenopacketRebuilder"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.__init__","text":"Initialise PhenopacketUtil Attributes: Name Type Description phenopacket Union [ Phenopacket , Family ] Phenopacket or Family object Source code in src/pheval/utils/phenopacket_utils.py 540 541 542 543 544 545 546 def __init__ ( self , phenopacket : Union [ Phenopacket , Family ]): \"\"\"Initialise PhenopacketUtil Attributes: phenopacket (Union[Phenopacket, Family]): Phenopacket or Family object \"\"\" self . phenopacket = phenopacket","title":"__init__()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.add_randomised_hpo","text":"Add randomised phenotypic profiles to a Phenopacket or Family. Parameters: Name Type Description Default randomised_hpo [ PhenotypicFeature ] The randomised phenotypic profiles to be added. required Returns: Type Description Union [ Phenopacket , Family ] Union[Phenopacket, Family] The Phenopacket or Family object with added randomised profiles. Source code in src/pheval/utils/phenopacket_utils.py 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 def add_randomised_hpo ( self , randomised_hpo : [ PhenotypicFeature ]) -> Union [ Phenopacket , Family ]: \"\"\" Add randomised phenotypic profiles to a Phenopacket or Family. Args: randomised_hpo: The randomised phenotypic profiles to be added. Returns: Union[Phenopacket, Family] The Phenopacket or Family object with added randomised profiles. \"\"\" phenopacket = copy ( self . phenopacket ) if hasattr ( phenopacket , \"proband\" ): del phenopacket . proband . phenotypic_features [:] phenopacket . proband . phenotypic_features . extend ( randomised_hpo ) else : del phenopacket . phenotypic_features [:] phenopacket . phenotypic_features . extend ( randomised_hpo ) return phenopacket","title":"add_randomised_hpo()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.add_spiked_vcf_path","text":"Add a spiked VCF path to a Phenopacket or Family. spiked_vcf_file_data (File): The VCF file data to be added. Phenopacket or Family: The Phenopacket or Family object with the added spiked VCF path. Source code in src/pheval/utils/phenopacket_utils.py 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 def add_spiked_vcf_path ( self , spiked_vcf_file_data : File ) -> Union [ Phenopacket , Family ]: \"\"\" Add a spiked VCF path to a Phenopacket or Family. Args: - spiked_vcf_file_data (File): The VCF file data to be added. Returns: - Phenopacket or Family: The Phenopacket or Family object with the added spiked VCF path. \"\"\" phenopacket = copy ( self . phenopacket ) phenopacket_files = [ file for file in phenopacket . files if file . file_attributes [ \"fileFormat\" ] != \"vcf\" ] phenopacket_files . append ( spiked_vcf_file_data ) del phenopacket . files [:] phenopacket . files . extend ( phenopacket_files ) return phenopacket","title":"add_spiked_vcf_path()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.update_interpretations","text":"Add the updated interpretations to a Phenopacket or Family. Parameters: Name Type Description Default interpretations List [ Interpretation ] The updated interpretations to be added. required Returns: Type Description Union [ Phenopacket , Family ] Union[Phenopacket, Family]: The Phenopacket or Family object with updated interpretations. Source code in src/pheval/utils/phenopacket_utils.py 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 def update_interpretations ( self , interpretations : [ Interpretation ] ) -> Union [ Phenopacket , Family ]: \"\"\" Add the updated interpretations to a Phenopacket or Family. Args: interpretations (List[Interpretation]): The updated interpretations to be added. Returns: Union[Phenopacket, Family]: The Phenopacket or Family object with updated interpretations. \"\"\" phenopacket = copy ( self . phenopacket ) if hasattr ( phenopacket , \"proband\" ): del phenopacket . proband . interpretations [:] phenopacket . proband . interpretations . extend ( interpretations ) else : del phenopacket . interpretations [:] phenopacket . interpretations . extend ( interpretations ) return phenopacket","title":"update_interpretations()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil","text":"Class for retrieving data from a Phenopacket or Family object Source code in src/pheval/utils/phenopacket_utils.py 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 class PhenopacketUtil : \"\"\"Class for retrieving data from a Phenopacket or Family object\"\"\" def __init__ ( self , phenopacket_contents : Union [ Phenopacket , Family ]): \"\"\"Initialise PhenopacketUtil Args: phenopacket_contents (Union[Phenopacket, Family]): Phenopacket or Family object \"\"\" self . phenopacket_contents = phenopacket_contents def sample_id ( self ) -> str : \"\"\" Retrieve the sample ID from a Phenopacket or proband of a Family Returns: str: Sample ID \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . subject . id else : return self . phenopacket_contents . subject . id def phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all HPO terms Returns: List[PhenotypicFeature]: List of HPO terms \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . phenotypic_features else : return self . phenopacket_contents . phenotypic_features def observed_phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all observed HPO terms Returns: List[PhenotypicFeature]: List of observed HPO terms \"\"\" phenotypic_features = [] all_phenotypic_features = self . phenotypic_features () for p in all_phenotypic_features : if p . excluded : continue phenotypic_features . append ( p ) return phenotypic_features def negated_phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all negated HPO terms Returns: List[PhenotypicFeature]: List of negated HPO terms \"\"\" negated_phenotypic_features = [] all_phenotypic_features = self . phenotypic_features () for p in all_phenotypic_features : if p . excluded : negated_phenotypic_features . append ( p ) return negated_phenotypic_features def diseases ( self ) -> List [ Disease ]: \"\"\" Retrieve a list of Diseases associated with the proband Returns: List[Disease]: List of diseases \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . diseases else : return self . phenopacket_contents . diseases def _diagnosis_from_interpretations ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a list of disease diagnoses associated with the proband from the interpretations object Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" diagnoses = [] interpretation = self . interpretations () for i in interpretation : ( diagnoses . append ( ProbandDisease ( disease_name = i . diagnosis . disease . label , disease_identifier = i . diagnosis . disease . id , ) ) if i . diagnosis . disease . label != \"\" and i . diagnosis . disease . id != \"\" else None ) return diagnoses def _diagnosis_from_disease ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a list of disease diagnoses associated with the proband from the diseases object Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" diagnoses = [] for disease in self . diseases (): diagnoses . append ( ProbandDisease ( disease_name = disease . term . label , disease_identifier = disease . term . id ) ) return diagnoses def diagnoses ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a unique list of disease diagnoses associated with the proband from a Phenopacket Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" return list ( set ( self . _diagnosis_from_interpretations () + self . _diagnosis_from_disease ())) def interpretations ( self ) -> List [ Interpretation ]: \"\"\" Retrieve a list of interpretations from a Phenopacket Returns: List[Interpretation]: List of interpretations \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . interpretations else : return self . phenopacket_contents . interpretations def causative_variants ( self ) -> List [ ProbandCausativeVariant ]: \"\"\" Retrieve a list of causative variants listed in a Phenopacket Returns: List[ProbandCausativeVariant]: List of proband causative variants \"\"\" all_variants = [] interpretation = self . interpretations () for i in interpretation : for g in i . diagnosis . genomic_interpretations : vcf_record = g . variant_interpretation . variation_descriptor . vcf_record genotype = g . variant_interpretation . variation_descriptor . allelic_state variant_data = ProbandCausativeVariant ( self . phenopacket_contents . subject . id , vcf_record . genome_assembly , GenomicVariant ( vcf_record . chrom , vcf_record . pos , vcf_record . ref , vcf_record . alt , ), genotype . label , vcf_record . info , ) all_variants . append ( variant_data ) return all_variants def files ( self ) -> List [ File ]: \"\"\" Retrieve a list of files associated with a phenopacket Returns: List[File]: List of files associated with a phenopacket \"\"\" return self . phenopacket_contents . files def vcf_file_data ( self , phenopacket_path : Path , vcf_dir : Path ) -> File : \"\"\" Retrieve the genome assembly and VCF file name from a phenopacket. Args: phenopacket_path (Path): The path to the phenopacket file. vcf_dir (Path): The directory path where the VCF file is stored. Returns: File: The VCF file with updated URI pointing to the specified directory. Raises: IncorrectFileFormatError: If the provided file is not in .vcf or .vcf.gz format. IncompatibleGenomeAssemblyError: If the genome assembly of the VCF file is not compatible. Note: This function searches for a VCF file within the provided list of files, validates its format, and checks if the genome assembly is compatible. If the conditions are met, it updates the URI of the VCF file to the specified directory and returns the modified file object. \"\"\" compatible_genome_assembly = [ \"GRCh37\" , \"hg19\" , \"GRCh38\" , \"hg38\" ] vcf_data = [ file for file in self . files () if file . file_attributes [ \"fileFormat\" ] == \"vcf\" ][ 0 ] if not Path ( vcf_data . uri ) . name . endswith ( \".vcf\" ) and not Path ( vcf_data . uri ) . name . endswith ( \".vcf.gz\" ): raise IncorrectFileFormatError ( Path ( vcf_data . uri ), \".vcf or .vcf.gz file\" ) if vcf_data . file_attributes [ \"genomeAssembly\" ] not in compatible_genome_assembly : raise IncompatibleGenomeAssemblyError ( vcf_data . file_attributes [ \"genomeAssembly\" ], phenopacket_path ) vcf_data . uri = str ( vcf_dir . joinpath ( Path ( vcf_data . uri ) . name )) return vcf_data @staticmethod def _extract_diagnosed_gene ( genomic_interpretation : GenomicInterpretation , ) -> ProbandCausativeGene : \"\"\" Retrieve the disease causing genes from the variant descriptor field if not empty, otherwise, retrieves from the gene descriptor from a phenopacket. Args: genomic_interpretation (GenomicInterpretation): A genomic interpretation from a Phenopacket Returns: ProbandCausativeGene: The disease causing gene \"\"\" if genomic_interpretation . variant_interpretation . ByteSize () != 0 : return ProbandCausativeGene ( genomic_interpretation . variant_interpretation . variation_descriptor . gene_context . symbol , genomic_interpretation . variant_interpretation . variation_descriptor . gene_context . value_id , ) else : return ProbandCausativeGene ( gene_symbol = genomic_interpretation . gene . symbol , gene_identifier = genomic_interpretation . gene . value_id , ) def diagnosed_genes ( self ) -> List [ ProbandCausativeGene ]: \"\"\" Retrieve the disease causing genes from a phenopacket. Returns: List[ProbandCausativeGene]: List of causative genes \"\"\" pheno_interpretation = self . interpretations () genes = [] for i in pheno_interpretation : for g in i . diagnosis . genomic_interpretations : genes . append ( self . _extract_diagnosed_gene ( g )) genes = list ({ gene . gene_symbol : gene for gene in genes } . values ()) return genes def diagnosed_variants ( self ) -> List [ GenomicVariant ]: \"\"\" Retrieve a list of all known causative variants from a phenopacket. Returns: List[GenomicVariant]: List of causative variants \"\"\" variants = [] pheno_interpretation = self . interpretations () for i in pheno_interpretation : for g in i . diagnosis . genomic_interpretations : variant = GenomicVariant ( chrom = str ( g . variant_interpretation . variation_descriptor . vcf_record . chrom . replace ( \"chr\" , \"\" ) ), pos = int ( g . variant_interpretation . variation_descriptor . vcf_record . pos ), ref = g . variant_interpretation . variation_descriptor . vcf_record . ref , alt = g . variant_interpretation . variation_descriptor . vcf_record . alt , ) variants . append ( variant ) return variants def check_incomplete_variant_record ( self ) -> bool : \"\"\" Check if any variant record in the phenopacket has incomplete information. This method iterates through the diagnosed variant records and checks if any of them have missing or incomplete information such as empty chromosome, position, reference, or alternate allele. Returns: bool: True if any variant record is incomplete, False otherwise. \"\"\" variants = self . diagnosed_variants () for variant in variants : if ( variant . chrom == \"\" or variant . pos == 0 or variant . pos == \"\" or variant . ref == \"\" or variant . alt == \"\" ): return True return False def check_incomplete_gene_record ( self ) -> bool : \"\"\" Check if any gene record in the phenopacket has incomplete information. This method iterates through the diagnosed gene records and checks if any of them have missing or incomplete information such as gene name, or gene identifier. Returns: bool: True if any gene record is incomplete, False otherwise. \"\"\" genes = self . diagnosed_genes () for gene in genes : if gene . gene_symbol == \"\" or gene . gene_identifier == \"\" : return True return False def check_incomplete_disease_record ( self ) -> bool : \"\"\" Check if any disease record in the phenopacket has incomplete information. This method iterates through the diagnosed disease records and checks if any of them have missing or incomplete information such as empty disease name, or disease identifier. Returns: bool: True if any disease record is incomplete, False otherwise. \"\"\" if len ( self . diagnoses ()) == 0 : return True return False","title":"PhenopacketUtil"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.__init__","text":"Initialise PhenopacketUtil Parameters: Name Type Description Default phenopacket_contents Union [ Phenopacket , Family ] Phenopacket or Family object required Source code in src/pheval/utils/phenopacket_utils.py 222 223 224 225 226 227 228 def __init__ ( self , phenopacket_contents : Union [ Phenopacket , Family ]): \"\"\"Initialise PhenopacketUtil Args: phenopacket_contents (Union[Phenopacket, Family]): Phenopacket or Family object \"\"\" self . phenopacket_contents = phenopacket_contents","title":"__init__()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.causative_variants","text":"Retrieve a list of causative variants listed in a Phenopacket Returns: Type Description List [ ProbandCausativeVariant ] List[ProbandCausativeVariant]: List of proband causative variants Source code in src/pheval/utils/phenopacket_utils.py 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 def causative_variants ( self ) -> List [ ProbandCausativeVariant ]: \"\"\" Retrieve a list of causative variants listed in a Phenopacket Returns: List[ProbandCausativeVariant]: List of proband causative variants \"\"\" all_variants = [] interpretation = self . interpretations () for i in interpretation : for g in i . diagnosis . genomic_interpretations : vcf_record = g . variant_interpretation . variation_descriptor . vcf_record genotype = g . variant_interpretation . variation_descriptor . allelic_state variant_data = ProbandCausativeVariant ( self . phenopacket_contents . subject . id , vcf_record . genome_assembly , GenomicVariant ( vcf_record . chrom , vcf_record . pos , vcf_record . ref , vcf_record . alt , ), genotype . label , vcf_record . info , ) all_variants . append ( variant_data ) return all_variants","title":"causative_variants()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.check_incomplete_disease_record","text":"Check if any disease record in the phenopacket has incomplete information. This method iterates through the diagnosed disease records and checks if any of them have missing or incomplete information such as empty disease name, or disease identifier. Returns: Name Type Description bool bool True if any disease record is incomplete, False otherwise. Source code in src/pheval/utils/phenopacket_utils.py 522 523 524 525 526 527 528 529 530 531 532 533 534 def check_incomplete_disease_record ( self ) -> bool : \"\"\" Check if any disease record in the phenopacket has incomplete information. This method iterates through the diagnosed disease records and checks if any of them have missing or incomplete information such as empty disease name, or disease identifier. Returns: bool: True if any disease record is incomplete, False otherwise. \"\"\" if len ( self . diagnoses ()) == 0 : return True return False","title":"check_incomplete_disease_record()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.check_incomplete_gene_record","text":"Check if any gene record in the phenopacket has incomplete information. This method iterates through the diagnosed gene records and checks if any of them have missing or incomplete information such as gene name, or gene identifier. Returns: Name Type Description bool bool True if any gene record is incomplete, False otherwise. Source code in src/pheval/utils/phenopacket_utils.py 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 def check_incomplete_gene_record ( self ) -> bool : \"\"\" Check if any gene record in the phenopacket has incomplete information. This method iterates through the diagnosed gene records and checks if any of them have missing or incomplete information such as gene name, or gene identifier. Returns: bool: True if any gene record is incomplete, False otherwise. \"\"\" genes = self . diagnosed_genes () for gene in genes : if gene . gene_symbol == \"\" or gene . gene_identifier == \"\" : return True return False","title":"check_incomplete_gene_record()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.check_incomplete_variant_record","text":"Check if any variant record in the phenopacket has incomplete information. This method iterates through the diagnosed variant records and checks if any of them have missing or incomplete information such as empty chromosome, position, reference, or alternate allele. Returns: Name Type Description bool bool True if any variant record is incomplete, False otherwise. Source code in src/pheval/utils/phenopacket_utils.py 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 def check_incomplete_variant_record ( self ) -> bool : \"\"\" Check if any variant record in the phenopacket has incomplete information. This method iterates through the diagnosed variant records and checks if any of them have missing or incomplete information such as empty chromosome, position, reference, or alternate allele. Returns: bool: True if any variant record is incomplete, False otherwise. \"\"\" variants = self . diagnosed_variants () for variant in variants : if ( variant . chrom == \"\" or variant . pos == 0 or variant . pos == \"\" or variant . ref == \"\" or variant . alt == \"\" ): return True return False","title":"check_incomplete_variant_record()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.diagnosed_genes","text":"Retrieve the disease causing genes from a phenopacket. Returns: Type Description List [ ProbandCausativeGene ] List[ProbandCausativeGene]: List of causative genes Source code in src/pheval/utils/phenopacket_utils.py 446 447 448 449 450 451 452 453 454 455 456 457 458 def diagnosed_genes ( self ) -> List [ ProbandCausativeGene ]: \"\"\" Retrieve the disease causing genes from a phenopacket. Returns: List[ProbandCausativeGene]: List of causative genes \"\"\" pheno_interpretation = self . interpretations () genes = [] for i in pheno_interpretation : for g in i . diagnosis . genomic_interpretations : genes . append ( self . _extract_diagnosed_gene ( g )) genes = list ({ gene . gene_symbol : gene for gene in genes } . values ()) return genes","title":"diagnosed_genes()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.diagnosed_variants","text":"Retrieve a list of all known causative variants from a phenopacket. Returns: Type Description List [ GenomicVariant ] List[GenomicVariant]: List of causative variants Source code in src/pheval/utils/phenopacket_utils.py 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 def diagnosed_variants ( self ) -> List [ GenomicVariant ]: \"\"\" Retrieve a list of all known causative variants from a phenopacket. Returns: List[GenomicVariant]: List of causative variants \"\"\" variants = [] pheno_interpretation = self . interpretations () for i in pheno_interpretation : for g in i . diagnosis . genomic_interpretations : variant = GenomicVariant ( chrom = str ( g . variant_interpretation . variation_descriptor . vcf_record . chrom . replace ( \"chr\" , \"\" ) ), pos = int ( g . variant_interpretation . variation_descriptor . vcf_record . pos ), ref = g . variant_interpretation . variation_descriptor . vcf_record . ref , alt = g . variant_interpretation . variation_descriptor . vcf_record . alt , ) variants . append ( variant ) return variants","title":"diagnosed_variants()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.diagnoses","text":"Retrieve a unique list of disease diagnoses associated with the proband from a Phenopacket Returns: Type Description List [ ProbandDisease ] List[ProbandDisease]: List of diagnosed diseases Source code in src/pheval/utils/phenopacket_utils.py 331 332 333 334 335 336 337 338 def diagnoses ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a unique list of disease diagnoses associated with the proband from a Phenopacket Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" return list ( set ( self . _diagnosis_from_interpretations () + self . _diagnosis_from_disease ()))","title":"diagnoses()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.diseases","text":"Retrieve a list of Diseases associated with the proband Returns: Type Description List [ Disease ] List[Disease]: List of diseases Source code in src/pheval/utils/phenopacket_utils.py 283 284 285 286 287 288 289 290 291 292 293 def diseases ( self ) -> List [ Disease ]: \"\"\" Retrieve a list of Diseases associated with the proband Returns: List[Disease]: List of diseases \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . diseases else : return self . phenopacket_contents . diseases","title":"diseases()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.files","text":"Retrieve a list of files associated with a phenopacket Returns: Type Description List [ File ] List[File]: List of files associated with a phenopacket Source code in src/pheval/utils/phenopacket_utils.py 380 381 382 383 384 385 386 387 def files ( self ) -> List [ File ]: \"\"\" Retrieve a list of files associated with a phenopacket Returns: List[File]: List of files associated with a phenopacket \"\"\" return self . phenopacket_contents . files","title":"files()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.interpretations","text":"Retrieve a list of interpretations from a Phenopacket Returns: Type Description List [ Interpretation ] List[Interpretation]: List of interpretations Source code in src/pheval/utils/phenopacket_utils.py 340 341 342 343 344 345 346 347 348 349 350 def interpretations ( self ) -> List [ Interpretation ]: \"\"\" Retrieve a list of interpretations from a Phenopacket Returns: List[Interpretation]: List of interpretations \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . interpretations else : return self . phenopacket_contents . interpretations","title":"interpretations()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.negated_phenotypic_features","text":"Retrieve a list of all negated HPO terms Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: List of negated HPO terms Source code in src/pheval/utils/phenopacket_utils.py 269 270 271 272 273 274 275 276 277 278 279 280 281 def negated_phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all negated HPO terms Returns: List[PhenotypicFeature]: List of negated HPO terms \"\"\" negated_phenotypic_features = [] all_phenotypic_features = self . phenotypic_features () for p in all_phenotypic_features : if p . excluded : negated_phenotypic_features . append ( p ) return negated_phenotypic_features","title":"negated_phenotypic_features()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.observed_phenotypic_features","text":"Retrieve a list of all observed HPO terms Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: List of observed HPO terms Source code in src/pheval/utils/phenopacket_utils.py 254 255 256 257 258 259 260 261 262 263 264 265 266 267 def observed_phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all observed HPO terms Returns: List[PhenotypicFeature]: List of observed HPO terms \"\"\" phenotypic_features = [] all_phenotypic_features = self . phenotypic_features () for p in all_phenotypic_features : if p . excluded : continue phenotypic_features . append ( p ) return phenotypic_features","title":"observed_phenotypic_features()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.phenotypic_features","text":"Retrieve a list of all HPO terms Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: List of HPO terms Source code in src/pheval/utils/phenopacket_utils.py 242 243 244 245 246 247 248 249 250 251 252 def phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all HPO terms Returns: List[PhenotypicFeature]: List of HPO terms \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . phenotypic_features else : return self . phenopacket_contents . phenotypic_features","title":"phenotypic_features()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.sample_id","text":"Retrieve the sample ID from a Phenopacket or proband of a Family Returns: Name Type Description str str Sample ID Source code in src/pheval/utils/phenopacket_utils.py 230 231 232 233 234 235 236 237 238 239 240 def sample_id ( self ) -> str : \"\"\" Retrieve the sample ID from a Phenopacket or proband of a Family Returns: str: Sample ID \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . subject . id else : return self . phenopacket_contents . subject . id","title":"sample_id()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.vcf_file_data","text":"Retrieve the genome assembly and VCF file name from a phenopacket. Parameters: Name Type Description Default phenopacket_path Path The path to the phenopacket file. required vcf_dir Path The directory path where the VCF file is stored. required Returns: Name Type Description File File The VCF file with updated URI pointing to the specified directory. Raises: Type Description IncorrectFileFormatError If the provided file is not in .vcf or .vcf.gz format. IncompatibleGenomeAssemblyError If the genome assembly of the VCF file is not compatible. Note This function searches for a VCF file within the provided list of files, validates its format, and checks if the genome assembly is compatible. If the conditions are met, it updates the URI of the VCF file to the specified directory and returns the modified file object. Source code in src/pheval/utils/phenopacket_utils.py 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 def vcf_file_data ( self , phenopacket_path : Path , vcf_dir : Path ) -> File : \"\"\" Retrieve the genome assembly and VCF file name from a phenopacket. Args: phenopacket_path (Path): The path to the phenopacket file. vcf_dir (Path): The directory path where the VCF file is stored. Returns: File: The VCF file with updated URI pointing to the specified directory. Raises: IncorrectFileFormatError: If the provided file is not in .vcf or .vcf.gz format. IncompatibleGenomeAssemblyError: If the genome assembly of the VCF file is not compatible. Note: This function searches for a VCF file within the provided list of files, validates its format, and checks if the genome assembly is compatible. If the conditions are met, it updates the URI of the VCF file to the specified directory and returns the modified file object. \"\"\" compatible_genome_assembly = [ \"GRCh37\" , \"hg19\" , \"GRCh38\" , \"hg38\" ] vcf_data = [ file for file in self . files () if file . file_attributes [ \"fileFormat\" ] == \"vcf\" ][ 0 ] if not Path ( vcf_data . uri ) . name . endswith ( \".vcf\" ) and not Path ( vcf_data . uri ) . name . endswith ( \".vcf.gz\" ): raise IncorrectFileFormatError ( Path ( vcf_data . uri ), \".vcf or .vcf.gz file\" ) if vcf_data . file_attributes [ \"genomeAssembly\" ] not in compatible_genome_assembly : raise IncompatibleGenomeAssemblyError ( vcf_data . file_attributes [ \"genomeAssembly\" ], phenopacket_path ) vcf_data . uri = str ( vcf_dir . joinpath ( Path ( vcf_data . uri ) . name )) return vcf_data","title":"vcf_file_data()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.ProbandCausativeGene","text":"Represents a causative gene associated with a proband Parameters: Name Type Description Default gene_symbol str Symbol representing the gene required gene_identifier str The ENSEMBL gene identifier for the result entry required Notes While we recommend providing the gene identifier in the ENSEMBL namespace, any matching format used in Phenopacket interpretations and result output is acceptable for result matching purposes in the analysis. Source code in src/pheval/utils/phenopacket_utils.py 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 @dataclass class ProbandCausativeGene : \"\"\" Represents a causative gene associated with a proband Args: gene_symbol (str): Symbol representing the gene gene_identifier (str): The ENSEMBL gene identifier for the result entry Notes: While we recommend providing the gene identifier in the ENSEMBL namespace, any matching format used in Phenopacket interpretations and result output is acceptable for result matching purposes in the analysis. \"\"\" gene_symbol : str gene_identifier : str","title":"ProbandCausativeGene"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.ProbandCausativeVariant","text":"Represents a causative variant associated with a proband Parameters: Name Type Description Default proband_id str ID of the proband required assembly str Genome assembly required variant GenomicVariant Genomic variant associated with the proband required genotype str Genotype information for the variant required info str Additional information about the variant (default is an empty string) '' Source code in src/pheval/utils/phenopacket_utils.py 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 @dataclass class ProbandCausativeVariant : \"\"\" Represents a causative variant associated with a proband Args: proband_id (str): ID of the proband assembly (str): Genome assembly variant (GenomicVariant): Genomic variant associated with the proband genotype (str): Genotype information for the variant info (str, optional): Additional information about the variant (default is an empty string) \"\"\" proband_id : str assembly : str variant : GenomicVariant genotype : str info : str = \"\"","title":"ProbandCausativeVariant"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.ProbandDisease","text":"Represents a disease associated with a proband Parameters: Name Type Description Default disease_name str Name of the disease required disease_identifier str Identifier for the disease result entry in the OMIM namespace required Notes While we recommend providing the disease identifier in the OMIM namespace, any matching format used in Phenopacket interpretations and result output is acceptable for result matching purposes in the analysis. Source code in src/pheval/utils/phenopacket_utils.py 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 @dataclass ( frozen = True , eq = True ) class ProbandDisease : \"\"\" Represents a disease associated with a proband Args: disease_name (str): Name of the disease disease_identifier (str): Identifier for the disease result entry in the OMIM namespace Notes: While we recommend providing the disease identifier in the OMIM namespace, any matching format used in Phenopacket interpretations and result output is acceptable for result matching purposes in the analysis. \"\"\" disease_name : str disease_identifier : str","title":"ProbandDisease"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.create_gene_identifier_map","text":"Create a mapping of gene identifiers to gene symbols using HGNC data. Returns: Name Type Description dict dict A mapping of gene identifiers to gene symbols. Notes The dictionary structure: { 'identifier': 'gene_symbol', ... } Source code in src/pheval/utils/phenopacket_utils.py 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 def create_gene_identifier_map () -> dict : \"\"\" Create a mapping of gene identifiers to gene symbols using HGNC data. Returns: dict: A mapping of gene identifiers to gene symbols. Notes: The dictionary structure: { 'identifier': 'gene_symbol', ... } \"\"\" hgnc_df = read_hgnc_data () identifier_map = {} for _index , row in hgnc_df . iterrows (): identifier_map [ row [ \"ensembl_gene_id\" ]] = row [ \"symbol\" ] identifier_map [ row [ \"hgnc_id\" ]] = row [ \"symbol\" ] identifier_map [ row [ \"entrez_id\" ]] = row [ \"symbol\" ] identifier_map [ row [ \"refseq_accession\" ]] = row [ \"symbol\" ] return identifier_map","title":"create_gene_identifier_map()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.create_hgnc_dict","text":"Create a dictionary as a reference for updating gene symbols and identifiers based on HGNC data. Returns: Name Type Description defaultdict defaultdict A dictionary containing gene symbols as keys and their associated gene information. Notes The dictionary structure: { 'gene_symbol': { 'ensembl_id': str, 'hgnc_id': str, 'entrez_id': str, 'refseq_accession': str, 'previous_symbol': [str, ...] }, ... } Source code in src/pheval/utils/phenopacket_utils.py 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 def create_hgnc_dict () -> defaultdict : \"\"\" Create a dictionary as a reference for updating gene symbols and identifiers based on HGNC data. Returns: defaultdict: A dictionary containing gene symbols as keys and their associated gene information. Notes: The dictionary structure: { 'gene_symbol': { 'ensembl_id': str, 'hgnc_id': str, 'entrez_id': str, 'refseq_accession': str, 'previous_symbol': [str, ...] }, ... } \"\"\" hgnc_df = read_hgnc_data () hgnc_data = defaultdict ( dict ) for _index , row in hgnc_df . iterrows (): previous_names = [] hgnc_data [ row [ \"symbol\" ]][ \"ensembl_id\" ] = row [ \"ensembl_gene_id\" ] hgnc_data [ row [ \"symbol\" ]][ \"hgnc_id\" ] = row [ \"hgnc_id\" ] hgnc_data [ row [ \"symbol\" ]][ \"entrez_id\" ] = row [ \"entrez_id\" ] hgnc_data [ row [ \"symbol\" ]][ \"refseq_accession\" ] = row [ \"refseq_accession\" ] previous = str ( row [ \"prev_symbol\" ]) . split ( \"|\" ) for p in previous : previous_names . append ( p . strip ( '\"' )) hgnc_data [ row [ \"symbol\" ]][ \"previous_symbol\" ] = previous_names return hgnc_data","title":"create_hgnc_dict()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.create_json_message","text":"Create a JSON message for writing to a file. phenopacket (Union[Phenopacket, Family]): The Phenopacket or Family object to convert to JSON. str: A JSON-formatted string representation of the Phenopacket or Family object. Source code in src/pheval/utils/phenopacket_utils.py 608 609 610 611 612 613 614 615 616 617 618 def create_json_message ( phenopacket : Union [ Phenopacket , Family ]) -> str : \"\"\" Create a JSON message for writing to a file. Args: - phenopacket (Union[Phenopacket, Family]): The Phenopacket or Family object to convert to JSON. Returns: - str: A JSON-formatted string representation of the Phenopacket or Family object. \"\"\" return MessageToJson ( phenopacket )","title":"create_json_message()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.phenopacket_reader","text":"Read a Phenopacket file and returns its contents as a Phenopacket or Family object Parameters: Name Type Description Default file Path Path to the Phenopacket file required Returns: Type Description Union [ Phenopacket , Family ] Union[Phenopacket, Family]: Contents of the Phenopacket file as a Phenopacket or Family object Source code in src/pheval/utils/phenopacket_utils.py 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 def phenopacket_reader ( file : Path ) -> Union [ Phenopacket , Family ]: \"\"\" Read a Phenopacket file and returns its contents as a Phenopacket or Family object Args: file (Path): Path to the Phenopacket file Returns: Union[Phenopacket, Family]: Contents of the Phenopacket file as a Phenopacket or Family object \"\"\" file = open ( file , \"r\" ) phenopacket = json . load ( file ) file . close () if \"proband\" in phenopacket : return Parse ( json . dumps ( phenopacket ), Family ()) else : return Parse ( json . dumps ( phenopacket ), Phenopacket ())","title":"phenopacket_reader()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.read_hgnc_data","text":"Read HGNC data from a file and return it as a Pandas DataFrame. Returns: Type Description pd . DataFrame pd.DataFrame: DataFrame containing the HGNC data. Source code in src/pheval/utils/phenopacket_utils.py 125 126 127 128 129 130 131 132 133 134 135 136 def read_hgnc_data () -> pd . DataFrame : \"\"\" Read HGNC data from a file and return it as a Pandas DataFrame. Returns: pd.DataFrame: DataFrame containing the HGNC data. \"\"\" return pd . read_csv ( os . path . dirname ( __file__ ) . replace ( \"utils\" , \"resources/hgnc_complete_set.txt\" ), delimiter = \" \\t \" , dtype = str , )","title":"read_hgnc_data()"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.write_phenopacket","text":"Write a Phenopacket or Family object to a file in JSON format. Parameters: Name Type Description Default phenopacket Phenopacket or Family The Phenopacket or Family object to be written. required output_file Path The Path object representing the file to write the Phenopacket data. required Returns: Type Description None None Source code in src/pheval/utils/phenopacket_utils.py 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 def write_phenopacket ( phenopacket : Union [ Phenopacket , Family ], output_file : Path ) -> None : \"\"\" Write a Phenopacket or Family object to a file in JSON format. Args: phenopacket (Phenopacket or Family): The Phenopacket or Family object to be written. output_file (Path): The Path object representing the file to write the Phenopacket data. Returns: None \"\"\" phenopacket_json = create_json_message ( phenopacket ) with open ( output_file , \"w\" ) as outfile : outfile . write ( phenopacket_json ) outfile . close ()","title":"write_phenopacket()"},{"location":"api/pheval/utils/semsim_utils/","text":"Contains all pheval utility methods diff_semsim ( semsim_left , semsim_right , score_column , absolute_diff ) Calculates score difference between two semantic similarity profiles Parameters: Name Type Description Default semsim_left pd . DataFrame first semantic similarity dataframe required semsim_right pd . DataFrame second semantic similarity dataframe required score_column str Score column that will be computed (e.g. jaccard_similarity) required absolute_diff bool Whether the difference is absolute (True) or percentage (False). required Returns: Type Description pd . DataFrame pd.DataFrame: A dataframe with terms and its scores differences Source code in src/pheval/utils/semsim_utils.py 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 def diff_semsim ( semsim_left : pd . DataFrame , semsim_right : pd . DataFrame , score_column : str , absolute_diff : bool ) -> pd . DataFrame : \"\"\"Calculates score difference between two semantic similarity profiles Args: semsim_left (pd.DataFrame): first semantic similarity dataframe semsim_right (pd.DataFrame): second semantic similarity dataframe score_column (str): Score column that will be computed (e.g. jaccard_similarity) absolute_diff (bool, optional): Whether the difference is absolute (True) or percentage (False). Defaults to True. Returns: pd.DataFrame: A dataframe with terms and its scores differences \"\"\" df = pd . merge ( semsim_left , semsim_right , on = [ \"subject_id\" , \"object_id\" ], how = \"outer\" ) if absolute_diff : df [ \"diff\" ] = df [ f \" { score_column } _x\" ] - df [ f \" { score_column } _y\" ] return df [[ \"subject_id\" , \"object_id\" , \"diff\" ]] df [ \"diff\" ] = df . apply ( lambda row : get_percentage_diff ( row [ f \" { score_column } _x\" ], row [ f \" { score_column } _y\" ]), axis = 1 ) return df [[ \"subject_id\" , \"object_id\" , f \" { score_column } _x\" , f \" { score_column } _y\" , \"diff\" ]] filter_non_0_score ( data , col ) Removes rows that have value equal to 0 based on the given column passed by col parameter Parameters: Name Type Description Default data pd . DataFrame Dirty dataframe required col str Column to be filtered required Returns: Type Description pd . DataFrame pd.DataFrame: Filtered dataframe Source code in src/pheval/utils/semsim_utils.py 14 15 16 17 18 19 20 21 22 23 24 def filter_non_0_score ( data : pd . DataFrame , col : str ) -> pd . DataFrame : \"\"\"Removes rows that have value equal to 0 based on the given column passed by col parameter Args: data (pd.DataFrame): Dirty dataframe col (str): Column to be filtered Returns: pd.DataFrame: Filtered dataframe \"\"\" return data [ data [ col ] != 0 ] get_percentage_diff ( current_number , previous_number ) Gets the percentage difference between two numbers Parameters: Name Type Description Default current_number float second number in comparison required previous_number float first number in comparison required Returns: Name Type Description float float percentage difference between two numbers Source code in src/pheval/utils/semsim_utils.py 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 def get_percentage_diff ( current_number : float , previous_number : float ) -> float : \"\"\"Gets the percentage difference between two numbers Args: current_number (float): second number in comparison previous_number (float): first number in comparison Returns: float: percentage difference between two numbers \"\"\" try : if current_number == previous_number : return \" {:.2%} \" . format ( 0 ) if current_number > previous_number : number = ( 1 - (( current_number / previous_number ))) * 100 else : number = ( 100 - (( previous_number / current_number ) * 100 )) * - 1 return \" {:.2%} \" . format ( number / 100 ) except ZeroDivisionError : return None parse_semsim ( df , cols ) Parses semantic similarity profiles converting the score column as a numeric value and dropping the null ones Parameters: Name Type Description Default df pd . DataFrame semantic similarity profile dataframe required cols list list of columns that will be selected on semsim data required Returns: Type Description pd . DataFrame pd.Dataframe: parsed semantic similarity dataframe Source code in src/pheval/utils/semsim_utils.py 27 28 29 30 31 32 33 34 35 36 37 38 39 def parse_semsim ( df : pd . DataFrame , cols : list ) -> pd . DataFrame : \"\"\"Parses semantic similarity profiles converting the score column as a numeric value and dropping the null ones Args: df (pd.DataFrame): semantic similarity profile dataframe cols (list): list of columns that will be selected on semsim data Returns: pd.Dataframe: parsed semantic similarity dataframe \"\"\" df [ cols [ - 1 ]] = pd . to_numeric ( df [ cols [ - 1 ]], errors = \"coerce\" ) df . replace ( \"None\" , numpy . nan ) . dropna ( subset = cols [ - 1 ], inplace = True ) return df percentage_diff ( semsim_left , semsim_right , score_column , output ) Compares two semantic similarity profiles Parameters: Name Type Description Default semsim_left Path File path of the first semantic similarity profile required semsim_right Path File path of the second semantic similarity profile required score_column str Score column that will be computed (e.g. jaccard_similarity) required output Path Output path for the difference tsv file required Source code in src/pheval/utils/semsim_utils.py 67 68 69 70 71 72 73 74 75 76 77 def percentage_diff ( semsim_left : Path , semsim_right : Path , score_column : str , output : Path ): \"\"\"Compares two semantic similarity profiles Args: semsim_left (Path): File path of the first semantic similarity profile semsim_right (Path): File path of the second semantic similarity profile score_column (str): Score column that will be computed (e.g. jaccard_similarity) output (Path): Output path for the difference tsv file \"\"\" clean_df = semsim_analysis ( semsim_left , semsim_right , score_column , absolute_diff = False ) clean_df . sort_values ( by = \"diff\" , ascending = False ) . to_csv ( output , sep = \" \\t \" , index = False ) semsim_analysis ( semsim_left , semsim_right , score_column , absolute_diff = True ) semsim_analysis Parameters: Name Type Description Default semsim_left Path File path of the first semantic similarity profile required semsim_right Path File path of the second semantic similarity profile required score_column str Score column that will be computed (e.g. jaccard_similarity) required absolute_diff bool Whether the difference is absolute (True) or percentage (False). True Returns: Type Description pd . DataFrame [pd.DataFrame]: DataFrame with the differences between two semantic similarity profiles Source code in src/pheval/utils/semsim_utils.py 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 def semsim_analysis ( semsim_left : Path , semsim_right : Path , score_column : str , absolute_diff = True ) -> pd . DataFrame : \"\"\"semsim_analysis Args: semsim_left (Path): File path of the first semantic similarity profile semsim_right (Path): File path of the second semantic similarity profile score_column (str): Score column that will be computed (e.g. jaccard_similarity) absolute_diff (bool, optional): Whether the difference is absolute (True) or percentage (False). Defaults to True. Returns: [pd.DataFrame]: DataFrame with the differences between two semantic similarity profiles \"\"\" validate_semsim_file_comparison ( semsim_left , semsim_right ) cols = [ \"subject_id\" , \"object_id\" , score_column ] semsim_left = pd . read_csv ( semsim_left , sep = \" \\t \" ) semsim_right = pd . read_csv ( semsim_right , sep = \" \\t \" ) file_utils . ensure_columns_exists ( cols = cols , err_message = \"must exist in semsim dataframes\" , dataframes = [ semsim_left , semsim_right ], ) semsim_left = parse_semsim ( semsim_left , cols ) semsim_right = parse_semsim ( semsim_right , cols ) diff_df = diff_semsim ( semsim_left , semsim_right , score_column , absolute_diff ) return filter_non_0_score ( diff_df , \"diff\" ) semsim_heatmap_plot ( semsim_left , semsim_right , score_column ) Plots semantic similarity profiles heatmap Parameters: Name Type Description Default semsim_left Path File path of the first semantic similarity profile required semsim_right Path File path of the second semantic similarity profile required score_column str Score column that will be computed (e.g. jaccard_similarity) required Source code in src/pheval/utils/semsim_utils.py 80 81 82 83 84 85 86 87 88 89 90 91 def semsim_heatmap_plot ( semsim_left : Path , semsim_right : Path , score_column : str ): \"\"\"Plots semantic similarity profiles heatmap Args: semsim_left (Path): File path of the first semantic similarity profile semsim_right (Path): File path of the second semantic similarity profile score_column (str): Score column that will be computed (e.g. jaccard_similarity) \"\"\" clean_df = semsim_analysis ( semsim_left , semsim_right , score_column ) df = clean_df . pivot ( index = \"subject_id\" , columns = \"object_id\" , values = \"diff\" ) fig = px . imshow ( df , text_auto = True ) fig . show () validate_semsim_file_comparison ( semsim_left , semsim_right ) Checks if files exist and whether they're different Parameters: Name Type Description Default semsim_left Path File path of the first semantic similarity profile required semsim_right Path File path of the second semantic similarity profile required Raises: Type Description Exception FileNotFoundException Source code in src/pheval/utils/semsim_utils.py 124 125 126 127 128 129 130 131 132 133 134 135 def validate_semsim_file_comparison ( semsim_left : Path , semsim_right : Path ): \"\"\"Checks if files exist and whether they're different Args: semsim_left (Path): File path of the first semantic similarity profile semsim_right (Path): File path of the second semantic similarity profile Raises: Exception: FileNotFoundException \"\"\" if semsim_left == semsim_right : errmsg = \"Semantic similarity profiles are equal. Make sure you have selected different files to analyze\" raise Exception ( errmsg ) file_utils . ensure_file_exists ( semsim_left , semsim_right )","title":"Semsim utils"},{"location":"api/pheval/utils/semsim_utils/#src.pheval.utils.semsim_utils.diff_semsim","text":"Calculates score difference between two semantic similarity profiles Parameters: Name Type Description Default semsim_left pd . DataFrame first semantic similarity dataframe required semsim_right pd . DataFrame second semantic similarity dataframe required score_column str Score column that will be computed (e.g. jaccard_similarity) required absolute_diff bool Whether the difference is absolute (True) or percentage (False). required Returns: Type Description pd . DataFrame pd.DataFrame: A dataframe with terms and its scores differences Source code in src/pheval/utils/semsim_utils.py 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 def diff_semsim ( semsim_left : pd . DataFrame , semsim_right : pd . DataFrame , score_column : str , absolute_diff : bool ) -> pd . DataFrame : \"\"\"Calculates score difference between two semantic similarity profiles Args: semsim_left (pd.DataFrame): first semantic similarity dataframe semsim_right (pd.DataFrame): second semantic similarity dataframe score_column (str): Score column that will be computed (e.g. jaccard_similarity) absolute_diff (bool, optional): Whether the difference is absolute (True) or percentage (False). Defaults to True. Returns: pd.DataFrame: A dataframe with terms and its scores differences \"\"\" df = pd . merge ( semsim_left , semsim_right , on = [ \"subject_id\" , \"object_id\" ], how = \"outer\" ) if absolute_diff : df [ \"diff\" ] = df [ f \" { score_column } _x\" ] - df [ f \" { score_column } _y\" ] return df [[ \"subject_id\" , \"object_id\" , \"diff\" ]] df [ \"diff\" ] = df . apply ( lambda row : get_percentage_diff ( row [ f \" { score_column } _x\" ], row [ f \" { score_column } _y\" ]), axis = 1 ) return df [[ \"subject_id\" , \"object_id\" , f \" { score_column } _x\" , f \" { score_column } _y\" , \"diff\" ]]","title":"diff_semsim()"},{"location":"api/pheval/utils/semsim_utils/#src.pheval.utils.semsim_utils.filter_non_0_score","text":"Removes rows that have value equal to 0 based on the given column passed by col parameter Parameters: Name Type Description Default data pd . DataFrame Dirty dataframe required col str Column to be filtered required Returns: Type Description pd . DataFrame pd.DataFrame: Filtered dataframe Source code in src/pheval/utils/semsim_utils.py 14 15 16 17 18 19 20 21 22 23 24 def filter_non_0_score ( data : pd . DataFrame , col : str ) -> pd . DataFrame : \"\"\"Removes rows that have value equal to 0 based on the given column passed by col parameter Args: data (pd.DataFrame): Dirty dataframe col (str): Column to be filtered Returns: pd.DataFrame: Filtered dataframe \"\"\" return data [ data [ col ] != 0 ]","title":"filter_non_0_score()"},{"location":"api/pheval/utils/semsim_utils/#src.pheval.utils.semsim_utils.get_percentage_diff","text":"Gets the percentage difference between two numbers Parameters: Name Type Description Default current_number float second number in comparison required previous_number float first number in comparison required Returns: Name Type Description float float percentage difference between two numbers Source code in src/pheval/utils/semsim_utils.py 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 def get_percentage_diff ( current_number : float , previous_number : float ) -> float : \"\"\"Gets the percentage difference between two numbers Args: current_number (float): second number in comparison previous_number (float): first number in comparison Returns: float: percentage difference between two numbers \"\"\" try : if current_number == previous_number : return \" {:.2%} \" . format ( 0 ) if current_number > previous_number : number = ( 1 - (( current_number / previous_number ))) * 100 else : number = ( 100 - (( previous_number / current_number ) * 100 )) * - 1 return \" {:.2%} \" . format ( number / 100 ) except ZeroDivisionError : return None","title":"get_percentage_diff()"},{"location":"api/pheval/utils/semsim_utils/#src.pheval.utils.semsim_utils.parse_semsim","text":"Parses semantic similarity profiles converting the score column as a numeric value and dropping the null ones Parameters: Name Type Description Default df pd . DataFrame semantic similarity profile dataframe required cols list list of columns that will be selected on semsim data required Returns: Type Description pd . DataFrame pd.Dataframe: parsed semantic similarity dataframe Source code in src/pheval/utils/semsim_utils.py 27 28 29 30 31 32 33 34 35 36 37 38 39 def parse_semsim ( df : pd . DataFrame , cols : list ) -> pd . DataFrame : \"\"\"Parses semantic similarity profiles converting the score column as a numeric value and dropping the null ones Args: df (pd.DataFrame): semantic similarity profile dataframe cols (list): list of columns that will be selected on semsim data Returns: pd.Dataframe: parsed semantic similarity dataframe \"\"\" df [ cols [ - 1 ]] = pd . to_numeric ( df [ cols [ - 1 ]], errors = \"coerce\" ) df . replace ( \"None\" , numpy . nan ) . dropna ( subset = cols [ - 1 ], inplace = True ) return df","title":"parse_semsim()"},{"location":"api/pheval/utils/semsim_utils/#src.pheval.utils.semsim_utils.percentage_diff","text":"Compares two semantic similarity profiles Parameters: Name Type Description Default semsim_left Path File path of the first semantic similarity profile required semsim_right Path File path of the second semantic similarity profile required score_column str Score column that will be computed (e.g. jaccard_similarity) required output Path Output path for the difference tsv file required Source code in src/pheval/utils/semsim_utils.py 67 68 69 70 71 72 73 74 75 76 77 def percentage_diff ( semsim_left : Path , semsim_right : Path , score_column : str , output : Path ): \"\"\"Compares two semantic similarity profiles Args: semsim_left (Path): File path of the first semantic similarity profile semsim_right (Path): File path of the second semantic similarity profile score_column (str): Score column that will be computed (e.g. jaccard_similarity) output (Path): Output path for the difference tsv file \"\"\" clean_df = semsim_analysis ( semsim_left , semsim_right , score_column , absolute_diff = False ) clean_df . sort_values ( by = \"diff\" , ascending = False ) . to_csv ( output , sep = \" \\t \" , index = False )","title":"percentage_diff()"},{"location":"api/pheval/utils/semsim_utils/#src.pheval.utils.semsim_utils.semsim_analysis","text":"semsim_analysis Parameters: Name Type Description Default semsim_left Path File path of the first semantic similarity profile required semsim_right Path File path of the second semantic similarity profile required score_column str Score column that will be computed (e.g. jaccard_similarity) required absolute_diff bool Whether the difference is absolute (True) or percentage (False). True Returns: Type Description pd . DataFrame [pd.DataFrame]: DataFrame with the differences between two semantic similarity profiles Source code in src/pheval/utils/semsim_utils.py 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 def semsim_analysis ( semsim_left : Path , semsim_right : Path , score_column : str , absolute_diff = True ) -> pd . DataFrame : \"\"\"semsim_analysis Args: semsim_left (Path): File path of the first semantic similarity profile semsim_right (Path): File path of the second semantic similarity profile score_column (str): Score column that will be computed (e.g. jaccard_similarity) absolute_diff (bool, optional): Whether the difference is absolute (True) or percentage (False). Defaults to True. Returns: [pd.DataFrame]: DataFrame with the differences between two semantic similarity profiles \"\"\" validate_semsim_file_comparison ( semsim_left , semsim_right ) cols = [ \"subject_id\" , \"object_id\" , score_column ] semsim_left = pd . read_csv ( semsim_left , sep = \" \\t \" ) semsim_right = pd . read_csv ( semsim_right , sep = \" \\t \" ) file_utils . ensure_columns_exists ( cols = cols , err_message = \"must exist in semsim dataframes\" , dataframes = [ semsim_left , semsim_right ], ) semsim_left = parse_semsim ( semsim_left , cols ) semsim_right = parse_semsim ( semsim_right , cols ) diff_df = diff_semsim ( semsim_left , semsim_right , score_column , absolute_diff ) return filter_non_0_score ( diff_df , \"diff\" )","title":"semsim_analysis()"},{"location":"api/pheval/utils/semsim_utils/#src.pheval.utils.semsim_utils.semsim_heatmap_plot","text":"Plots semantic similarity profiles heatmap Parameters: Name Type Description Default semsim_left Path File path of the first semantic similarity profile required semsim_right Path File path of the second semantic similarity profile required score_column str Score column that will be computed (e.g. jaccard_similarity) required Source code in src/pheval/utils/semsim_utils.py 80 81 82 83 84 85 86 87 88 89 90 91 def semsim_heatmap_plot ( semsim_left : Path , semsim_right : Path , score_column : str ): \"\"\"Plots semantic similarity profiles heatmap Args: semsim_left (Path): File path of the first semantic similarity profile semsim_right (Path): File path of the second semantic similarity profile score_column (str): Score column that will be computed (e.g. jaccard_similarity) \"\"\" clean_df = semsim_analysis ( semsim_left , semsim_right , score_column ) df = clean_df . pivot ( index = \"subject_id\" , columns = \"object_id\" , values = \"diff\" ) fig = px . imshow ( df , text_auto = True ) fig . show ()","title":"semsim_heatmap_plot()"},{"location":"api/pheval/utils/semsim_utils/#src.pheval.utils.semsim_utils.validate_semsim_file_comparison","text":"Checks if files exist and whether they're different Parameters: Name Type Description Default semsim_left Path File path of the first semantic similarity profile required semsim_right Path File path of the second semantic similarity profile required Raises: Type Description Exception FileNotFoundException Source code in src/pheval/utils/semsim_utils.py 124 125 126 127 128 129 130 131 132 133 134 135 def validate_semsim_file_comparison ( semsim_left : Path , semsim_right : Path ): \"\"\"Checks if files exist and whether they're different Args: semsim_left (Path): File path of the first semantic similarity profile semsim_right (Path): File path of the second semantic similarity profile Raises: Exception: FileNotFoundException \"\"\" if semsim_left == semsim_right : errmsg = \"Semantic similarity profiles are equal. Make sure you have selected different files to analyze\" raise Exception ( errmsg ) file_utils . ensure_file_exists ( semsim_left , semsim_right )","title":"validate_semsim_file_comparison()"},{"location":"api/pheval/utils/utils/","text":"Contains all pheval utility methods rand ( df , min_num , max_num , scramble_factor ) Numeric scrambling Parameters: Name Type Description Default df pd . DataFrame dataframe records required min_num int min value from this records required max_num int max value from this records required scramble_factor float scramble factor scalar required Returns: Name Type Description float float randomized number Source code in src/pheval/utils/utils.py 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 def rand ( df : pd . DataFrame , min_num : int , max_num : int , scramble_factor : float ) -> float : \"\"\" Numeric scrambling Args: df (pd.DataFrame): dataframe records min_num (int): min value from this records max_num (int): max value from this records scramble_factor (float): scramble factor scalar Returns: float: randomized number \"\"\" try : return df + ( random . uniform ( min_num , max_num ) * scramble_factor ) except TypeError as err : info_log . error ( df , exc_info = err ) return df semsim_scramble ( input , output , columns_to_be_scrambled , scramble_factor = 0.5 ) Scrambles semantic similarity profile with a magnitude between 0 and 1 (scramble_factor: 0 means no scrambling and 1 means complete randomisation). It then randomises the above scores with a degree of the scramble_factor and returns a scrambles pandas dataframe. Args: input (Path): scramble_factor (float) scalar scramble factor columns_to_be_scrambled (List[str]): columns that will be scrambled in semsim file (e.g. jaccard_similarity). output (Path) Returns: pd.Dataframe: scrambled dataframe Source code in src/pheval/utils/utils.py 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 def semsim_scramble ( input : Path , output : Path , columns_to_be_scrambled : List [ str ], scramble_factor : float = 0.5 , ) -> pd . DataFrame : \"\"\" Scrambles semantic similarity profile with a magnitude between 0 and 1 (scramble_factor: 0 means no scrambling and 1 means complete randomisation). It then randomises the above scores with a degree of the scramble_factor and returns a scrambles pandas dataframe. Args: input (Path): scramble_factor (float) scalar scramble factor columns_to_be_scrambled (List[str]): columns that will be scrambled in semsim file (e.g. jaccard_similarity). output (Path) Returns: pd.Dataframe: scrambled dataframe \"\"\" semsim = pd . read_csv ( input , sep = \" \\t \" ) dataframe = semsim_scramble_df ( semsim , columns_to_be_scrambled , scramble_factor ) dataframe . to_csv ( output , sep = \" \\t \" , index = False ) semsim_scramble_df ( dataframe , columns_to_be_scrambled , scramble_factor ) scramble_semsim_df Parameters: Name Type Description Default dataframe pd . DataFrame dataframe that contains semsim profile required columns_to_be_scrambled List [ str ] required Returns: Type Description pd . DataFrame pd.Dataframe: scrambled dataframe Source code in src/pheval/utils/utils.py 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 def semsim_scramble_df ( dataframe : pd . DataFrame , columns_to_be_scrambled : List [ str ], scramble_factor : float , ) -> pd . DataFrame : \"\"\"scramble_semsim_df Args: dataframe (pd.DataFrame): dataframe that contains semsim profile scramble_factor (float) scalar scramble factor columns_to_be_scrambled (List[str]): Returns: pd.Dataframe: scrambled dataframe \"\"\" for col in columns_to_be_scrambled : min_num = dataframe [ col ] . min () max_num = dataframe [ col ] . max () dataframe [ col ] = dataframe [ col ] . apply ( rand , args = ( min_num , max_num , scramble_factor )) return dataframe","title":"Utils"},{"location":"api/pheval/utils/utils/#src.pheval.utils.utils.rand","text":"Numeric scrambling Parameters: Name Type Description Default df pd . DataFrame dataframe records required min_num int min value from this records required max_num int max value from this records required scramble_factor float scramble factor scalar required Returns: Name Type Description float float randomized number Source code in src/pheval/utils/utils.py 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 def rand ( df : pd . DataFrame , min_num : int , max_num : int , scramble_factor : float ) -> float : \"\"\" Numeric scrambling Args: df (pd.DataFrame): dataframe records min_num (int): min value from this records max_num (int): max value from this records scramble_factor (float): scramble factor scalar Returns: float: randomized number \"\"\" try : return df + ( random . uniform ( min_num , max_num ) * scramble_factor ) except TypeError as err : info_log . error ( df , exc_info = err ) return df","title":"rand()"},{"location":"api/pheval/utils/utils/#src.pheval.utils.utils.semsim_scramble","text":"Scrambles semantic similarity profile with a magnitude between 0 and 1 (scramble_factor: 0 means no scrambling and 1 means complete randomisation). It then randomises the above scores with a degree of the scramble_factor and returns a scrambles pandas dataframe. Args: input (Path): scramble_factor (float) scalar scramble factor columns_to_be_scrambled (List[str]): columns that will be scrambled in semsim file (e.g. jaccard_similarity). output (Path) Returns: pd.Dataframe: scrambled dataframe Source code in src/pheval/utils/utils.py 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 def semsim_scramble ( input : Path , output : Path , columns_to_be_scrambled : List [ str ], scramble_factor : float = 0.5 , ) -> pd . DataFrame : \"\"\" Scrambles semantic similarity profile with a magnitude between 0 and 1 (scramble_factor: 0 means no scrambling and 1 means complete randomisation). It then randomises the above scores with a degree of the scramble_factor and returns a scrambles pandas dataframe. Args: input (Path): scramble_factor (float) scalar scramble factor columns_to_be_scrambled (List[str]): columns that will be scrambled in semsim file (e.g. jaccard_similarity). output (Path) Returns: pd.Dataframe: scrambled dataframe \"\"\" semsim = pd . read_csv ( input , sep = \" \\t \" ) dataframe = semsim_scramble_df ( semsim , columns_to_be_scrambled , scramble_factor ) dataframe . to_csv ( output , sep = \" \\t \" , index = False )","title":"semsim_scramble()"},{"location":"api/pheval/utils/utils/#src.pheval.utils.utils.semsim_scramble_df","text":"scramble_semsim_df Parameters: Name Type Description Default dataframe pd . DataFrame dataframe that contains semsim profile required columns_to_be_scrambled List [ str ] required Returns: Type Description pd . DataFrame pd.Dataframe: scrambled dataframe Source code in src/pheval/utils/utils.py 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 def semsim_scramble_df ( dataframe : pd . DataFrame , columns_to_be_scrambled : List [ str ], scramble_factor : float , ) -> pd . DataFrame : \"\"\"scramble_semsim_df Args: dataframe (pd.DataFrame): dataframe that contains semsim profile scramble_factor (float) scalar scramble factor columns_to_be_scrambled (List[str]): Returns: pd.Dataframe: scrambled dataframe \"\"\" for col in columns_to_be_scrambled : min_num = dataframe [ col ] . min () max_num = dataframe [ col ] . max () dataframe [ col ] = dataframe [ col ] . apply ( rand , args = ( min_num , max_num , scramble_factor )) return dataframe","title":"semsim_scramble_df()"}]}
\ No newline at end of file
+{"config":{"indexing":"full","lang":["en"],"min_search_length":3,"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"Home Introduction PhEval - Phenotypic Inference Evaluation Framework PhEval: Tool-specific processing (VP pipeline) flowchart LR PC-->DP PC[(Phenopackets Corpus)] SSSOM[Semantic Similarity Profiles Mapping Commons]-->|OAK-SEMSIM|DP[Data Prepare] KG[Source data KG - Monarch KG]-->|KGX-BIOLINK|DP[Data Prepare] ONT[Ontologies - Phenio]-->|OAK-ONTO|DP[Data Prepare] DP-->RP[Run Prepare] RP-->PR[PhEval Runner] PR-->DP2[Data Process] ER[Exomiser Runner]-->PR EDP[Exomiser Data Prepare]-->DP ERP[Exomiser Run Prepare]-->RP PPP[Disease-profile similarity prediction Post-process]-->DP2 PV[Phenotype/Variant]-->DP2 GVP[Gene VP Post-process]-->DP2 EPP[Exomiser Post Process]-->GVP GVP-->VPR[VP Report] Quick links: GitHub page","title":"Home"},{"location":"#home","text":"","title":"Home"},{"location":"#introduction","text":"PhEval - Phenotypic Inference Evaluation Framework","title":"Introduction"},{"location":"#pheval-tool-specific-processing-vp-pipeline","text":"flowchart LR PC-->DP PC[(Phenopackets Corpus)] SSSOM[Semantic Similarity Profiles Mapping Commons]-->|OAK-SEMSIM|DP[Data Prepare] KG[Source data KG - Monarch KG]-->|KGX-BIOLINK|DP[Data Prepare] ONT[Ontologies - Phenio]-->|OAK-ONTO|DP[Data Prepare] DP-->RP[Run Prepare] RP-->PR[PhEval Runner] PR-->DP2[Data Process] ER[Exomiser Runner]-->PR EDP[Exomiser Data Prepare]-->DP ERP[Exomiser Run Prepare]-->RP PPP[Disease-profile similarity prediction Post-process]-->DP2 PV[Phenotype/Variant]-->DP2 GVP[Gene VP Post-process]-->DP2 EPP[Exomiser Post Process]-->GVP GVP-->VPR[VP Report] Quick links: GitHub page","title":"PhEval: Tool-specific processing (VP pipeline)"},{"location":"CODE_OF_CONDUCT/","text":"Contributor Covenant Code of Conduct Our Pledge In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. Our Standards Examples of behavior that contributes to creating a positive environment include: Using welcoming and inclusive language Being respectful of differing viewpoints and experiences Gracefully accepting constructive criticism Focusing on what is best for the community Showing empathy towards other community members Examples of unacceptable behavior by participants include: The use of sexualized language or imagery and unwelcome sexual attention or advances Trolling, insulting/derogatory comments, and personal or political attacks Public or private harassment Publishing others' private information, such as a physical or electronic address, without explicit permission Other conduct which could reasonably be considered inappropriate in a professional setting Our Responsibilities Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. Scope This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. Attribution This code of conduct has been derived from the excellent code of conduct of the ATOM project which in turn is adapted from the Contributor Covenant , version 1.4, available at https://contributor-covenant.org/version/1/4","title":"Contributor Covenant Code of Conduct"},{"location":"CODE_OF_CONDUCT/#contributor-covenant-code-of-conduct","text":"","title":"Contributor Covenant Code of Conduct"},{"location":"CODE_OF_CONDUCT/#our-pledge","text":"In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.","title":"Our Pledge"},{"location":"CODE_OF_CONDUCT/#our-standards","text":"Examples of behavior that contributes to creating a positive environment include: Using welcoming and inclusive language Being respectful of differing viewpoints and experiences Gracefully accepting constructive criticism Focusing on what is best for the community Showing empathy towards other community members Examples of unacceptable behavior by participants include: The use of sexualized language or imagery and unwelcome sexual attention or advances Trolling, insulting/derogatory comments, and personal or political attacks Public or private harassment Publishing others' private information, such as a physical or electronic address, without explicit permission Other conduct which could reasonably be considered inappropriate in a professional setting","title":"Our Standards"},{"location":"CODE_OF_CONDUCT/#our-responsibilities","text":"Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.","title":"Our Responsibilities"},{"location":"CODE_OF_CONDUCT/#scope","text":"This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.","title":"Scope"},{"location":"CODE_OF_CONDUCT/#enforcement","text":"Instances of abusive, harassing, or otherwise unacceptable behavior. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.","title":"Enforcement"},{"location":"CODE_OF_CONDUCT/#attribution","text":"This code of conduct has been derived from the excellent code of conduct of the ATOM project which in turn is adapted from the Contributor Covenant , version 1.4, available at https://contributor-covenant.org/version/1/4","title":"Attribution"},{"location":"about/","text":"PhEval - Phenotypic Inference Evaluation Framework Many variant prioritization tools (such as Exomiser and other computational approaches) rely on ontologies and phenotype matching, sometimes involving complex processes such as cross-species inference. The performance of such tools is exceedingly hard to evaluate because of the many factors involved: changes to the structure of the ontology, cross-species mappings, and semantic similarity algorithms can have significant consequences. Furthermore, the lack of suitable real-world problems/corpora leads to the situation that many algorithms are evaluated using simulations, which may fail to capture real-world scenarios. The lack of an evaluation framework that enables studying effects on data and knowledge inputs on real-world problems makes it difficult to optimize algorithms. To this end, we are developing a modular Phenotypic Inference Evaluation Framework (PhEval), which is delivered as a community resource.","title":"About"},{"location":"about/#pheval-phenotypic-inference-evaluation-framework","text":"Many variant prioritization tools (such as Exomiser and other computational approaches) rely on ontologies and phenotype matching, sometimes involving complex processes such as cross-species inference. The performance of such tools is exceedingly hard to evaluate because of the many factors involved: changes to the structure of the ontology, cross-species mappings, and semantic similarity algorithms can have significant consequences. Furthermore, the lack of suitable real-world problems/corpora leads to the situation that many algorithms are evaluated using simulations, which may fail to capture real-world scenarios. The lack of an evaluation framework that enables studying effects on data and knowledge inputs on real-world problems makes it difficult to optimize algorithms. To this end, we are developing a modular Phenotypic Inference Evaluation Framework (PhEval), which is delivered as a community resource.","title":"PhEval - Phenotypic Inference Evaluation Framework"},{"location":"contact/","text":"Contact The preferred way to contact the PhEval team is through the issue tracker (for problems with PhEval) or the GitHub discussions (for general questions). You can find any of the members of the PhEval core team on GitHub: https://github.com/orgs/monarch-initiative/teams/pheval-team Their GitHub profiles usually also provide email addresses.","title":"Contact Us"},{"location":"contact/#contact","text":"The preferred way to contact the PhEval team is through the issue tracker (for problems with PhEval) or the GitHub discussions (for general questions). You can find any of the members of the PhEval core team on GitHub: https://github.com/orgs/monarch-initiative/teams/pheval-team Their GitHub profiles usually also provide email addresses.","title":"Contact"},{"location":"contributing/","text":"Contributions First of all: Thank you for taking the time to contribute! The following is a set of guidelines for contributing to the PhEval framework. These guidelines are not strict rules. Use your best judgment, and feel free to propose changes to this document in a pull request. Table Of Contents Contributions Table Of Contents Code of Conduct Guidelines for Contributions and Requests Reporting problems with the data model Code of Conduct The monarch-technical-documentation team strives to create a welcoming environment for editors, users and other contributors. Please carefully read our Code of Conduct . Guidelines for Contributions and Requests Reporting problems with the data model Please use our Issue Tracker for reporting problems with the ontology.","title":"Contributions"},{"location":"contributing/#contributions","text":"First of all: Thank you for taking the time to contribute! The following is a set of guidelines for contributing to the PhEval framework. These guidelines are not strict rules. Use your best judgment, and feel free to propose changes to this document in a pull request.","title":"Contributions"},{"location":"contributing/#table-of-contents","text":"Contributions Table Of Contents Code of Conduct Guidelines for Contributions and Requests Reporting problems with the data model","title":"Table Of Contents"},{"location":"contributing/#code-of-conduct","text":"The monarch-technical-documentation team strives to create a welcoming environment for editors, users and other contributors. Please carefully read our Code of Conduct .","title":"Code of Conduct"},{"location":"contributing/#guidelines-for-contributions-and-requests","text":"","title":"Guidelines for Contributions and Requests"},{"location":"contributing/#reporting-problems-with-the-data-model","text":"Please use our Issue Tracker for reporting problems with the ontology.","title":"Reporting problems with the data model"},{"location":"developing_a_pheval_plugin/","text":"Developing a PhEval Plugin Description Plugin development allows PhEval to be extensible, as we have designed it. The plugin goal is to be flexible through custom runner implementations. This plugin development enhances the PhEval functionality. You can build one quickly using this step-by-step process. All custom Runners implementations must implement all PhevalRunner methods Bases: ABC PhEvalRunner Class Source code in src/pheval/runners/runner.py 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 @dataclass class PhEvalRunner ( ABC ): \"\"\"PhEvalRunner Class\"\"\" input_dir : Path testdata_dir : Path tmp_dir : Path output_dir : Path config_file : Path version : str directory_path = None input_dir_config = None _meta_data = None __raw_results_dir = \"raw_results/\" __pheval_gene_results_dir = \"pheval_gene_results/\" __pheval_variant_results_dir = \"pheval_variant_results/\" __pheval_disease_results_dir = \"pheval_disease_results/\" __tool_input_commands_dir = \"tool_input_commands/\" __run_meta_data_file = \"results.yml\" def __post_init__ ( self ): self . input_dir_config = parse_input_dir_config ( self . input_dir ) def _get_tool ( self ): return self . input_dir_config . tool def _get_variant_analysis ( self ): return self . input_dir_config . variant_analysis def _get_gene_analysis ( self ): return self . input_dir_config . gene_analysis def _get_disease_analysis ( self ): return self . input_dir_config . disease_analysis @property def tool_input_commands_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __tool_input_commands_dir ) @tool_input_commands_dir . setter def tool_input_commands_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def raw_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __raw_results_dir ) @raw_results_dir . setter def raw_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def pheval_gene_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __pheval_gene_results_dir ) @pheval_gene_results_dir . setter def pheval_gene_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def pheval_variant_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __pheval_variant_results_dir ) @pheval_variant_results_dir . setter def pheval_variant_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def pheval_disease_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __pheval_disease_results_dir ) @pheval_disease_results_dir . setter def pheval_disease_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) def build_output_directory_structure ( self ): \"\"\"build output directory structure\"\"\" self . tool_input_commands_dir . mkdir ( exist_ok = True ) self . raw_results_dir . mkdir ( exist_ok = True ) if self . _get_variant_analysis (): self . pheval_variant_results_dir . mkdir ( exist_ok = True ) if self . _get_gene_analysis (): self . pheval_gene_results_dir . mkdir ( exist_ok = True ) if self . _get_disease_analysis (): self . pheval_disease_results_dir . mkdir ( exist_ok = True ) @property def meta_data ( self ): self . _meta_data = BasicOutputRunMetaData ( tool = self . input_dir_config . tool , tool_version = self . version , config = f \" { Path ( self . input_dir ) . parent . name } / { Path ( self . input_dir ) . name } \" , run_timestamp = datetime . now () . timestamp (), corpus = f \" { Path ( self . testdata_dir ) . parent . name } / { Path ( self . testdata_dir ) . name } \" , ) return self . _meta_data @meta_data . setter def meta_data ( self , meta_data ): self . _meta_data = meta_data @abstractmethod def prepare ( self ) -> str : \"\"\"prepare\"\"\" @abstractmethod def run ( self ): \"\"\"run\"\"\" @abstractmethod def post_process ( self ): \"\"\"post_process\"\"\" def construct_meta_data ( self ): \"\"\"Construct run output meta data\"\"\" return self . meta_data Step-by-Step Plugin Development Process The plugin structure is derived from a cookiecutter template, Sphintoxetry-cookiecutter , and it uses Sphinx , tox and poetry as core dependencies. This allows PhEval extensibility to be standardized in terms of documentation and dependency management. 1. Sphintoxetry-cookiecutter scaffold First, install the cruft package. Cruft enables keeping projects up-to-date with future updates made to this original template. Install the latest release of cruft from pip pip install cruft NOTE: You may encounter an error with the naming of the project layout if using an older release of cruft. To avoid this, make sure you have installed the latest release version. Next, create a project using the sphintoxetry-cookiecutter template. cruft create https://github.com/monarch-initiative/monarch-project-template 2. Further setup Install poetry if you haven't already. pip install poetry Install dependencies poetry install Add PhEval dependency poetry add pheval Run tox to see if the setup works poetry run tox 3. Implement PhEval Custom Runner The runner name is arbitrary and custom Runner name was chose by demonstrative purposes Create a runner file inside the plugin project, e.g: \"\"\"Custom Pheval Runner.\"\"\" from dataclasses import dataclass from pathlib import Path from pheval.runners.runner import PhEvalRunner @dataclass class CustomPhevalRunner ( PhEvalRunner ): \"\"\"CustomPhevalRunner Class.\"\"\" input_dir : Path testdata_dir : Path tmp_dir : Path output_dir : Path config_file : Path version : str def prepare ( self ): \"\"\"prepare method.\"\"\" print ( \"preparing\" ) def run ( self ): \"\"\"run method.\"\"\" print ( \"running with custom pheval runner\" ) def post_process ( self ): \"\"\"post_process method.\"\"\" print ( \"post processing\" ) 4. Add PhEval Plugins section to the pyproject.toml file [tool.poetry.plugins. \"pheval.plugins\" ] customrunner = \"pheval_plugin_example.runner:CustomPhevalRunner\" Replace the value above with the path to your custom runner plugin 5. Implementing PhEval helper methods Streamlining the creation of your custom PhEval runner can be facilitated by leveraging PhEval's versatile helper methods, where applicable. Within PhEval, numerous public methods have been designed to assist in your runner methods. The utilisation of these helper methods is optional, yet they are crafted to enhance the overall implementation process. Utility methods The PhenopacketUtil class is designed to aid in the collection of specific data from a Phenopacket. Class for retrieving data from a Phenopacket or Family object Source code in src/pheval/utils/phenopacket_utils.py 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 class PhenopacketUtil : \"\"\"Class for retrieving data from a Phenopacket or Family object\"\"\" def __init__ ( self , phenopacket_contents : Union [ Phenopacket , Family ]): \"\"\"Initialise PhenopacketUtil Args: phenopacket_contents (Union[Phenopacket, Family]): Phenopacket or Family object \"\"\" self . phenopacket_contents = phenopacket_contents def sample_id ( self ) -> str : \"\"\" Retrieve the sample ID from a Phenopacket or proband of a Family Returns: str: Sample ID \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . subject . id else : return self . phenopacket_contents . subject . id def phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all HPO terms Returns: List[PhenotypicFeature]: List of HPO terms \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . phenotypic_features else : return self . phenopacket_contents . phenotypic_features def observed_phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all observed HPO terms Returns: List[PhenotypicFeature]: List of observed HPO terms \"\"\" phenotypic_features = [] all_phenotypic_features = self . phenotypic_features () for p in all_phenotypic_features : if p . excluded : continue phenotypic_features . append ( p ) return phenotypic_features def negated_phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all negated HPO terms Returns: List[PhenotypicFeature]: List of negated HPO terms \"\"\" negated_phenotypic_features = [] all_phenotypic_features = self . phenotypic_features () for p in all_phenotypic_features : if p . excluded : negated_phenotypic_features . append ( p ) return negated_phenotypic_features def diseases ( self ) -> List [ Disease ]: \"\"\" Retrieve a list of Diseases associated with the proband Returns: List[Disease]: List of diseases \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . diseases else : return self . phenopacket_contents . diseases def _diagnosis_from_interpretations ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a list of disease diagnoses associated with the proband from the interpretations object Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" diagnoses = [] interpretation = self . interpretations () for i in interpretation : ( diagnoses . append ( ProbandDisease ( disease_name = i . diagnosis . disease . label , disease_identifier = i . diagnosis . disease . id , ) ) if i . diagnosis . disease . label != \"\" and i . diagnosis . disease . id != \"\" else None ) return diagnoses def _diagnosis_from_disease ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a list of disease diagnoses associated with the proband from the diseases object Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" diagnoses = [] for disease in self . diseases (): diagnoses . append ( ProbandDisease ( disease_name = disease . term . label , disease_identifier = disease . term . id ) ) return diagnoses def diagnoses ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a unique list of disease diagnoses associated with the proband from a Phenopacket Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" return list ( set ( self . _diagnosis_from_interpretations () + self . _diagnosis_from_disease ())) def interpretations ( self ) -> List [ Interpretation ]: \"\"\" Retrieve a list of interpretations from a Phenopacket Returns: List[Interpretation]: List of interpretations \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . interpretations else : return self . phenopacket_contents . interpretations def causative_variants ( self ) -> List [ ProbandCausativeVariant ]: \"\"\" Retrieve a list of causative variants listed in a Phenopacket Returns: List[ProbandCausativeVariant]: List of proband causative variants \"\"\" all_variants = [] interpretation = self . interpretations () for i in interpretation : for g in i . diagnosis . genomic_interpretations : vcf_record = g . variant_interpretation . variation_descriptor . vcf_record genotype = g . variant_interpretation . variation_descriptor . allelic_state variant_data = ProbandCausativeVariant ( self . phenopacket_contents . subject . id , vcf_record . genome_assembly , GenomicVariant ( vcf_record . chrom , vcf_record . pos , vcf_record . ref , vcf_record . alt , ), genotype . label , vcf_record . info , ) all_variants . append ( variant_data ) return all_variants def files ( self ) -> List [ File ]: \"\"\" Retrieve a list of files associated with a phenopacket Returns: List[File]: List of files associated with a phenopacket \"\"\" return self . phenopacket_contents . files def vcf_file_data ( self , phenopacket_path : Path , vcf_dir : Path ) -> File : \"\"\" Retrieve the genome assembly and VCF file name from a phenopacket. Args: phenopacket_path (Path): The path to the phenopacket file. vcf_dir (Path): The directory path where the VCF file is stored. Returns: File: The VCF file with updated URI pointing to the specified directory. Raises: IncorrectFileFormatError: If the provided file is not in .vcf or .vcf.gz format. IncompatibleGenomeAssemblyError: If the genome assembly of the VCF file is not compatible. Note: This function searches for a VCF file within the provided list of files, validates its format, and checks if the genome assembly is compatible. If the conditions are met, it updates the URI of the VCF file to the specified directory and returns the modified file object. \"\"\" compatible_genome_assembly = [ \"GRCh37\" , \"hg19\" , \"GRCh38\" , \"hg38\" ] vcf_data = [ file for file in self . files () if file . file_attributes [ \"fileFormat\" ] == \"vcf\" ][ 0 ] if not Path ( vcf_data . uri ) . name . endswith ( \".vcf\" ) and not Path ( vcf_data . uri ) . name . endswith ( \".vcf.gz\" ): raise IncorrectFileFormatError ( Path ( vcf_data . uri ), \".vcf or .vcf.gz file\" ) if vcf_data . file_attributes [ \"genomeAssembly\" ] not in compatible_genome_assembly : raise IncompatibleGenomeAssemblyError ( vcf_data . file_attributes [ \"genomeAssembly\" ], phenopacket_path ) vcf_data . uri = str ( vcf_dir . joinpath ( Path ( vcf_data . uri ) . name )) return vcf_data @staticmethod def _extract_diagnosed_gene ( genomic_interpretation : GenomicInterpretation , ) -> ProbandCausativeGene : \"\"\" Retrieve the disease causing genes from the variant descriptor field if not empty, otherwise, retrieves from the gene descriptor from a phenopacket. Args: genomic_interpretation (GenomicInterpretation): A genomic interpretation from a Phenopacket Returns: ProbandCausativeGene: The disease causing gene \"\"\" if genomic_interpretation . variant_interpretation . ByteSize () != 0 : return ProbandCausativeGene ( genomic_interpretation . variant_interpretation . variation_descriptor . gene_context . symbol , genomic_interpretation . variant_interpretation . variation_descriptor . gene_context . value_id , ) else : return ProbandCausativeGene ( gene_symbol = genomic_interpretation . gene . symbol , gene_identifier = genomic_interpretation . gene . value_id , ) def diagnosed_genes ( self ) -> List [ ProbandCausativeGene ]: \"\"\" Retrieve the disease causing genes from a phenopacket. Returns: List[ProbandCausativeGene]: List of causative genes \"\"\" pheno_interpretation = self . interpretations () genes = [] for i in pheno_interpretation : for g in i . diagnosis . genomic_interpretations : genes . append ( self . _extract_diagnosed_gene ( g )) genes = list ({ gene . gene_symbol : gene for gene in genes } . values ()) return genes def diagnosed_variants ( self ) -> List [ GenomicVariant ]: \"\"\" Retrieve a list of all known causative variants from a phenopacket. Returns: List[GenomicVariant]: List of causative variants \"\"\" variants = [] pheno_interpretation = self . interpretations () for i in pheno_interpretation : for g in i . diagnosis . genomic_interpretations : variant = GenomicVariant ( chrom = str ( g . variant_interpretation . variation_descriptor . vcf_record . chrom . replace ( \"chr\" , \"\" ) ), pos = int ( g . variant_interpretation . variation_descriptor . vcf_record . pos ), ref = g . variant_interpretation . variation_descriptor . vcf_record . ref , alt = g . variant_interpretation . variation_descriptor . vcf_record . alt , ) variants . append ( variant ) return variants def check_incomplete_variant_record ( self ) -> bool : \"\"\" Check if any variant record in the phenopacket has incomplete information. This method iterates through the diagnosed variant records and checks if any of them have missing or incomplete information such as empty chromosome, position, reference, or alternate allele. Returns: bool: True if any variant record is incomplete, False otherwise. \"\"\" variants = self . diagnosed_variants () for variant in variants : if ( variant . chrom == \"\" or variant . pos == 0 or variant . pos == \"\" or variant . ref == \"\" or variant . alt == \"\" ): return True return False def check_incomplete_gene_record ( self ) -> bool : \"\"\" Check if any gene record in the phenopacket has incomplete information. This method iterates through the diagnosed gene records and checks if any of them have missing or incomplete information such as gene name, or gene identifier. Returns: bool: True if any gene record is incomplete, False otherwise. \"\"\" genes = self . diagnosed_genes () for gene in genes : if gene . gene_symbol == \"\" or gene . gene_identifier == \"\" : return True return False def check_incomplete_disease_record ( self ) -> bool : \"\"\" Check if any disease record in the phenopacket has incomplete information. This method iterates through the diagnosed disease records and checks if any of them have missing or incomplete information such as empty disease name, or disease identifier. Returns: bool: True if any disease record is incomplete, False otherwise. \"\"\" if len ( self . diagnoses ()) == 0 : return True return False PhenopacketUtil proves particularly beneficial in scenarios where the tool for which you're crafting a runner implementation does not directly accept Phenopackets as inputs. Instead, it might require elements\u2014such as HPO IDs\u2014 via the command-line interface (CLI). In this context, leveraging PhenopacketUtil within the runner's preparation phase enables the extraction of observed phenotypic features from the Phenopacket input, facilitating seamless processing. An example of how this could be implemented is outlined here: from pheval.utils.phenopacket_utils import phenopacket_reader from pheval.utils.phenopacket_utils import PhenopacketUtil phenopacket = phenopacket_reader ( \"/path/to/phenopacket.json\" ) phenopacket_util = PhenopacketUtil ( phenopacket ) # To return a list of all observed phenotypes for a phenopacket observed_phenotypes = phenopacket_util . observed_phenotypic_features () # To extract just the HPO ID as a list observed_phenotypes_hpo_ids = [ observed_phenotype . id for observed_phenotype in observed_phenotypes ] Additional tool-specific configurations For the pheval run command to execute successfully, a config.yaml should be found within the input directory supplied on the CLI. tool : tool_version : variant_analysis : gene_analysis : disease_analysis : tool_specific_configuration_options : The tool_specific_configuration_options is an optional field that can be populated with any variables specific to your runner implementation that is required for the running of your tool. All other fields are required to be filled in. The variant_analysis , gene_analysis , and disease_analysis are set as booleans and are for specifying what type of analysis/prioritisation the tool outputs. To populate the tool_specific_configurations_options with customised data, we suggest using the pydantic package as it can easily parse the data from the yaml structure. e.g., Define a BaseModel class with the fields that will populate the tool_specific_configuration_options from pydantic import BaseModel , Field class CustomisedConfigurations ( BaseModel ): \"\"\" Class for defining the customised configurations in tool_specific_configurations field, within the input_dir config.yaml Args: environment (str): Environment to run \"\"\" environment : str = Field ( ... ) Within your runner parse the field into an object. from dataclasses import dataclass from pheval.runners.runner import PhEvalRunner from pathlib import Path @dataclass class CustomPhevalRunner ( PhEvalRunner ): \"\"\"CustomPhevalRunner Class.\"\"\" input_dir : Path testdata_dir : Path tmp_dir : Path output_dir : Path config_file : Path version : str def prepare ( self ): \"\"\"prepare method.\"\"\" print ( \"preparing\" ) config = CustomisedConfigurations . parse_obj ( self . input_dir_config . tool_specific_configuration_options ) environment = config . environment def run ( self ): \"\"\"run method.\"\"\" print ( \"running with custom pheval runner\" ) def post_process ( self ): \"\"\"post_process method.\"\"\" print ( \"post processing\" ) Post-processing methods PhEval currently supports the benchmarking of gene, variant, and disease prioritisation results. To benchmark these result types, PhEval TSV result files need to be generated. PhEval can deal with the ranking and generation of these files to the correct location. However, the runner implementation must handle the extraction of essential data from the tool-specific raw results. This involves transforming them into a list comprising PhEval data classes, with each instance representing a result entry. The dataclasses representing essential information extracted from tool-specific output for gene, variant, and disease prioritisation are defined as follows: Bases: PhEvalResult Minimal data required from tool-specific output for gene prioritisation result Args: gene_symbol (Union[List[str], str]): The gene symbol(s) for the result entry gene_identifier (Union[List[str], str]): The ENSEMBL gene identifier(s) for the result entry score (float): The score for the gene result entry Notes: While we recommend providing the gene identifier in the ENSEMBL namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. Source code in src/pheval/post_processing/post_processing.py 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 @dataclass class PhEvalGeneResult ( PhEvalResult ): \"\"\"Minimal data required from tool-specific output for gene prioritisation result Args: gene_symbol (Union[List[str], str]): The gene symbol(s) for the result entry gene_identifier (Union[List[str], str]): The ENSEMBL gene identifier(s) for the result entry score (float): The score for the gene result entry Notes: While we recommend providing the gene identifier in the ENSEMBL namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. \"\"\" gene_symbol : Union [ List [ str ], str ] gene_identifier : Union [ List [ str ], str ] score : float Bases: PhEvalResult Minimal data required from tool-specific output for variant prioritisation Args: chromosome (str): The chromosome position of the variant recommended to be provided in the following format. This includes numerical designations from 1 to 22 representing autosomal chromosomes, as well as the sex chromosomes X and Y, and the mitochondrial chromosome MT. start (int): The start position of the variant end (int): The end position of the variant ref (str): The reference allele of the variant alt (str): The alternate allele of the variant score (float): The score for the variant result entry Notes: While we recommend providing the variant's chromosome in the specified format, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. Source code in src/pheval/post_processing/post_processing.py 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 @dataclass class PhEvalVariantResult ( PhEvalResult ): \"\"\"Minimal data required from tool-specific output for variant prioritisation Args: chromosome (str): The chromosome position of the variant recommended to be provided in the following format. This includes numerical designations from 1 to 22 representing autosomal chromosomes, as well as the sex chromosomes X and Y, and the mitochondrial chromosome MT. start (int): The start position of the variant end (int): The end position of the variant ref (str): The reference allele of the variant alt (str): The alternate allele of the variant score (float): The score for the variant result entry Notes: While we recommend providing the variant's chromosome in the specified format, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. \"\"\" chromosome : str start : int end : int ref : str alt : str score : float Bases: PhEvalResult Minimal data required from tool-specific output for disease prioritisation Args: disease_name (str): Disease name for the result entry disease_identifier (str): Identifier for the disease result entry in the OMIM namespace score (str): Score for the disease result entry Notes: While we recommend providing the disease identifier in the OMIM namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. Source code in src/pheval/post_processing/post_processing.py 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 @dataclass class PhEvalDiseaseResult ( PhEvalResult ): \"\"\"Minimal data required from tool-specific output for disease prioritisation Args: disease_name (str): Disease name for the result entry disease_identifier (str): Identifier for the disease result entry in the OMIM namespace score (str): Score for the disease result entry Notes: While we recommend providing the disease identifier in the OMIM namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. \"\"\" disease_name : str disease_identifier : str score : float The generate_pheval_result() can be implemented in your runner to write out the PhEval TSV results. An example of how the method can be called is outlined here: from pheval.post_processing.post_processing import generate_pheval_result generate_pheval_result ( pheval_result = pheval_gene_result , # this is the list of extracted PhEval result requirements sort_order_str = \"descending\" , # or can be ascending - this determines in which order the scores will be ranked output_dir = output_directory , # this can be accessed from the runner instance e.g., self.output_dir tool_result_path = tool_result_json # this is the path to the tool-specific raw results file ) Adding metadata to the results.yml By default, PhEval will write a results.yml to the output directory supplied on the CLI. The results.yml contains basic metadata regarding the run configuration, however, there is also the option to add customised run metadata to the results.yml in the tool_specific_configuration_options field. To achieve this, you'll need to create a construct_meta_data() method within your runner implementation. This method is responsible for appending customised metadata to the metadata object in the form of a defined dataclass. It should return the entire metadata object once the addition is completed. e.g., Defined customised metadata dataclass: from dataclasses import dataclass @dataclass class CustomisedMetaData : customised_field : str Example of implementation in the runner. from dataclasses import dataclass from pheval.runners.runner import PhEvalRunner from pathlib import Path @dataclass class CustomPhevalRunner ( PhEvalRunner ): \"\"\"CustomPhevalRunner Class.\"\"\" input_dir : Path testdata_dir : Path tmp_dir : Path output_dir : Path config_file : Path version : str def prepare ( self ): \"\"\"prepare method.\"\"\" print ( \"preparing\" ) def run ( self ): \"\"\"run method.\"\"\" print ( \"running with custom pheval runner\" ) def post_process ( self ): \"\"\"post_process method.\"\"\" print ( \"post processing\" ) def construct_meta_data ( self ): \"\"\"Add metadata.\"\"\" self . meta_data . tool_specific_configuration_options = CustomisedMetaData ( customised_field = \"customised_value\" ) return self . meta_data 6. Test it. To update your custom pheval runner implementation, you must first install the package poetry install Now you have to be able to run PhEval passing your custom runner as parameter. e.g., pheval run -i ./input_dir -t ./test_data_dir -r 'customphevalrunner' -o output_dir The -r parameter stands for your plugin runner class name, and it must be entirely lowercase. Output: preparing running with custom pheval Runner post processing Pay attention to \" running with custom pheval Runner \" line, this is exactly what we had implemented in the CustomPhevalRunner Example","title":"Developing a PhEval Plugin"},{"location":"developing_a_pheval_plugin/#developing-a-pheval-plugin","text":"","title":"Developing a PhEval Plugin"},{"location":"developing_a_pheval_plugin/#description","text":"Plugin development allows PhEval to be extensible, as we have designed it. The plugin goal is to be flexible through custom runner implementations. This plugin development enhances the PhEval functionality. You can build one quickly using this step-by-step process. All custom Runners implementations must implement all PhevalRunner methods Bases: ABC PhEvalRunner Class Source code in src/pheval/runners/runner.py 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 @dataclass class PhEvalRunner ( ABC ): \"\"\"PhEvalRunner Class\"\"\" input_dir : Path testdata_dir : Path tmp_dir : Path output_dir : Path config_file : Path version : str directory_path = None input_dir_config = None _meta_data = None __raw_results_dir = \"raw_results/\" __pheval_gene_results_dir = \"pheval_gene_results/\" __pheval_variant_results_dir = \"pheval_variant_results/\" __pheval_disease_results_dir = \"pheval_disease_results/\" __tool_input_commands_dir = \"tool_input_commands/\" __run_meta_data_file = \"results.yml\" def __post_init__ ( self ): self . input_dir_config = parse_input_dir_config ( self . input_dir ) def _get_tool ( self ): return self . input_dir_config . tool def _get_variant_analysis ( self ): return self . input_dir_config . variant_analysis def _get_gene_analysis ( self ): return self . input_dir_config . gene_analysis def _get_disease_analysis ( self ): return self . input_dir_config . disease_analysis @property def tool_input_commands_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __tool_input_commands_dir ) @tool_input_commands_dir . setter def tool_input_commands_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def raw_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __raw_results_dir ) @raw_results_dir . setter def raw_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def pheval_gene_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __pheval_gene_results_dir ) @pheval_gene_results_dir . setter def pheval_gene_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def pheval_variant_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __pheval_variant_results_dir ) @pheval_variant_results_dir . setter def pheval_variant_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def pheval_disease_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __pheval_disease_results_dir ) @pheval_disease_results_dir . setter def pheval_disease_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) def build_output_directory_structure ( self ): \"\"\"build output directory structure\"\"\" self . tool_input_commands_dir . mkdir ( exist_ok = True ) self . raw_results_dir . mkdir ( exist_ok = True ) if self . _get_variant_analysis (): self . pheval_variant_results_dir . mkdir ( exist_ok = True ) if self . _get_gene_analysis (): self . pheval_gene_results_dir . mkdir ( exist_ok = True ) if self . _get_disease_analysis (): self . pheval_disease_results_dir . mkdir ( exist_ok = True ) @property def meta_data ( self ): self . _meta_data = BasicOutputRunMetaData ( tool = self . input_dir_config . tool , tool_version = self . version , config = f \" { Path ( self . input_dir ) . parent . name } / { Path ( self . input_dir ) . name } \" , run_timestamp = datetime . now () . timestamp (), corpus = f \" { Path ( self . testdata_dir ) . parent . name } / { Path ( self . testdata_dir ) . name } \" , ) return self . _meta_data @meta_data . setter def meta_data ( self , meta_data ): self . _meta_data = meta_data @abstractmethod def prepare ( self ) -> str : \"\"\"prepare\"\"\" @abstractmethod def run ( self ): \"\"\"run\"\"\" @abstractmethod def post_process ( self ): \"\"\"post_process\"\"\" def construct_meta_data ( self ): \"\"\"Construct run output meta data\"\"\" return self . meta_data","title":"Description"},{"location":"developing_a_pheval_plugin/#step-by-step-plugin-development-process","text":"The plugin structure is derived from a cookiecutter template, Sphintoxetry-cookiecutter , and it uses Sphinx , tox and poetry as core dependencies. This allows PhEval extensibility to be standardized in terms of documentation and dependency management.","title":"Step-by-Step Plugin Development Process"},{"location":"developing_a_pheval_plugin/#1-sphintoxetry-cookiecutter-scaffold","text":"First, install the cruft package. Cruft enables keeping projects up-to-date with future updates made to this original template. Install the latest release of cruft from pip pip install cruft NOTE: You may encounter an error with the naming of the project layout if using an older release of cruft. To avoid this, make sure you have installed the latest release version. Next, create a project using the sphintoxetry-cookiecutter template. cruft create https://github.com/monarch-initiative/monarch-project-template","title":"1. Sphintoxetry-cookiecutter scaffold"},{"location":"developing_a_pheval_plugin/#2-further-setup","text":"","title":"2. Further setup"},{"location":"developing_a_pheval_plugin/#install-poetry-if-you-havent-already","text":"pip install poetry","title":"Install poetry if you haven't already."},{"location":"developing_a_pheval_plugin/#install-dependencies","text":"poetry install","title":"Install dependencies"},{"location":"developing_a_pheval_plugin/#add-pheval-dependency","text":"poetry add pheval","title":"Add PhEval dependency"},{"location":"developing_a_pheval_plugin/#run-tox-to-see-if-the-setup-works","text":"poetry run tox","title":"Run tox to see if the setup works"},{"location":"developing_a_pheval_plugin/#3-implement-pheval-custom-runner","text":"The runner name is arbitrary and custom Runner name was chose by demonstrative purposes Create a runner file inside the plugin project, e.g: \"\"\"Custom Pheval Runner.\"\"\" from dataclasses import dataclass from pathlib import Path from pheval.runners.runner import PhEvalRunner @dataclass class CustomPhevalRunner ( PhEvalRunner ): \"\"\"CustomPhevalRunner Class.\"\"\" input_dir : Path testdata_dir : Path tmp_dir : Path output_dir : Path config_file : Path version : str def prepare ( self ): \"\"\"prepare method.\"\"\" print ( \"preparing\" ) def run ( self ): \"\"\"run method.\"\"\" print ( \"running with custom pheval runner\" ) def post_process ( self ): \"\"\"post_process method.\"\"\" print ( \"post processing\" )","title":"3. Implement PhEval Custom Runner"},{"location":"developing_a_pheval_plugin/#4-add-pheval-plugins-section-to-the-pyprojecttoml-file","text":"[tool.poetry.plugins. \"pheval.plugins\" ] customrunner = \"pheval_plugin_example.runner:CustomPhevalRunner\" Replace the value above with the path to your custom runner plugin","title":"4. Add PhEval Plugins section to the pyproject.toml file"},{"location":"developing_a_pheval_plugin/#5-implementing-pheval-helper-methods","text":"Streamlining the creation of your custom PhEval runner can be facilitated by leveraging PhEval's versatile helper methods, where applicable. Within PhEval, numerous public methods have been designed to assist in your runner methods. The utilisation of these helper methods is optional, yet they are crafted to enhance the overall implementation process.","title":"5. Implementing PhEval helper methods"},{"location":"developing_a_pheval_plugin/#utility-methods","text":"The PhenopacketUtil class is designed to aid in the collection of specific data from a Phenopacket. Class for retrieving data from a Phenopacket or Family object Source code in src/pheval/utils/phenopacket_utils.py 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 class PhenopacketUtil : \"\"\"Class for retrieving data from a Phenopacket or Family object\"\"\" def __init__ ( self , phenopacket_contents : Union [ Phenopacket , Family ]): \"\"\"Initialise PhenopacketUtil Args: phenopacket_contents (Union[Phenopacket, Family]): Phenopacket or Family object \"\"\" self . phenopacket_contents = phenopacket_contents def sample_id ( self ) -> str : \"\"\" Retrieve the sample ID from a Phenopacket or proband of a Family Returns: str: Sample ID \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . subject . id else : return self . phenopacket_contents . subject . id def phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all HPO terms Returns: List[PhenotypicFeature]: List of HPO terms \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . phenotypic_features else : return self . phenopacket_contents . phenotypic_features def observed_phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all observed HPO terms Returns: List[PhenotypicFeature]: List of observed HPO terms \"\"\" phenotypic_features = [] all_phenotypic_features = self . phenotypic_features () for p in all_phenotypic_features : if p . excluded : continue phenotypic_features . append ( p ) return phenotypic_features def negated_phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all negated HPO terms Returns: List[PhenotypicFeature]: List of negated HPO terms \"\"\" negated_phenotypic_features = [] all_phenotypic_features = self . phenotypic_features () for p in all_phenotypic_features : if p . excluded : negated_phenotypic_features . append ( p ) return negated_phenotypic_features def diseases ( self ) -> List [ Disease ]: \"\"\" Retrieve a list of Diseases associated with the proband Returns: List[Disease]: List of diseases \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . diseases else : return self . phenopacket_contents . diseases def _diagnosis_from_interpretations ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a list of disease diagnoses associated with the proband from the interpretations object Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" diagnoses = [] interpretation = self . interpretations () for i in interpretation : ( diagnoses . append ( ProbandDisease ( disease_name = i . diagnosis . disease . label , disease_identifier = i . diagnosis . disease . id , ) ) if i . diagnosis . disease . label != \"\" and i . diagnosis . disease . id != \"\" else None ) return diagnoses def _diagnosis_from_disease ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a list of disease diagnoses associated with the proband from the diseases object Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" diagnoses = [] for disease in self . diseases (): diagnoses . append ( ProbandDisease ( disease_name = disease . term . label , disease_identifier = disease . term . id ) ) return diagnoses def diagnoses ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a unique list of disease diagnoses associated with the proband from a Phenopacket Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" return list ( set ( self . _diagnosis_from_interpretations () + self . _diagnosis_from_disease ())) def interpretations ( self ) -> List [ Interpretation ]: \"\"\" Retrieve a list of interpretations from a Phenopacket Returns: List[Interpretation]: List of interpretations \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . interpretations else : return self . phenopacket_contents . interpretations def causative_variants ( self ) -> List [ ProbandCausativeVariant ]: \"\"\" Retrieve a list of causative variants listed in a Phenopacket Returns: List[ProbandCausativeVariant]: List of proband causative variants \"\"\" all_variants = [] interpretation = self . interpretations () for i in interpretation : for g in i . diagnosis . genomic_interpretations : vcf_record = g . variant_interpretation . variation_descriptor . vcf_record genotype = g . variant_interpretation . variation_descriptor . allelic_state variant_data = ProbandCausativeVariant ( self . phenopacket_contents . subject . id , vcf_record . genome_assembly , GenomicVariant ( vcf_record . chrom , vcf_record . pos , vcf_record . ref , vcf_record . alt , ), genotype . label , vcf_record . info , ) all_variants . append ( variant_data ) return all_variants def files ( self ) -> List [ File ]: \"\"\" Retrieve a list of files associated with a phenopacket Returns: List[File]: List of files associated with a phenopacket \"\"\" return self . phenopacket_contents . files def vcf_file_data ( self , phenopacket_path : Path , vcf_dir : Path ) -> File : \"\"\" Retrieve the genome assembly and VCF file name from a phenopacket. Args: phenopacket_path (Path): The path to the phenopacket file. vcf_dir (Path): The directory path where the VCF file is stored. Returns: File: The VCF file with updated URI pointing to the specified directory. Raises: IncorrectFileFormatError: If the provided file is not in .vcf or .vcf.gz format. IncompatibleGenomeAssemblyError: If the genome assembly of the VCF file is not compatible. Note: This function searches for a VCF file within the provided list of files, validates its format, and checks if the genome assembly is compatible. If the conditions are met, it updates the URI of the VCF file to the specified directory and returns the modified file object. \"\"\" compatible_genome_assembly = [ \"GRCh37\" , \"hg19\" , \"GRCh38\" , \"hg38\" ] vcf_data = [ file for file in self . files () if file . file_attributes [ \"fileFormat\" ] == \"vcf\" ][ 0 ] if not Path ( vcf_data . uri ) . name . endswith ( \".vcf\" ) and not Path ( vcf_data . uri ) . name . endswith ( \".vcf.gz\" ): raise IncorrectFileFormatError ( Path ( vcf_data . uri ), \".vcf or .vcf.gz file\" ) if vcf_data . file_attributes [ \"genomeAssembly\" ] not in compatible_genome_assembly : raise IncompatibleGenomeAssemblyError ( vcf_data . file_attributes [ \"genomeAssembly\" ], phenopacket_path ) vcf_data . uri = str ( vcf_dir . joinpath ( Path ( vcf_data . uri ) . name )) return vcf_data @staticmethod def _extract_diagnosed_gene ( genomic_interpretation : GenomicInterpretation , ) -> ProbandCausativeGene : \"\"\" Retrieve the disease causing genes from the variant descriptor field if not empty, otherwise, retrieves from the gene descriptor from a phenopacket. Args: genomic_interpretation (GenomicInterpretation): A genomic interpretation from a Phenopacket Returns: ProbandCausativeGene: The disease causing gene \"\"\" if genomic_interpretation . variant_interpretation . ByteSize () != 0 : return ProbandCausativeGene ( genomic_interpretation . variant_interpretation . variation_descriptor . gene_context . symbol , genomic_interpretation . variant_interpretation . variation_descriptor . gene_context . value_id , ) else : return ProbandCausativeGene ( gene_symbol = genomic_interpretation . gene . symbol , gene_identifier = genomic_interpretation . gene . value_id , ) def diagnosed_genes ( self ) -> List [ ProbandCausativeGene ]: \"\"\" Retrieve the disease causing genes from a phenopacket. Returns: List[ProbandCausativeGene]: List of causative genes \"\"\" pheno_interpretation = self . interpretations () genes = [] for i in pheno_interpretation : for g in i . diagnosis . genomic_interpretations : genes . append ( self . _extract_diagnosed_gene ( g )) genes = list ({ gene . gene_symbol : gene for gene in genes } . values ()) return genes def diagnosed_variants ( self ) -> List [ GenomicVariant ]: \"\"\" Retrieve a list of all known causative variants from a phenopacket. Returns: List[GenomicVariant]: List of causative variants \"\"\" variants = [] pheno_interpretation = self . interpretations () for i in pheno_interpretation : for g in i . diagnosis . genomic_interpretations : variant = GenomicVariant ( chrom = str ( g . variant_interpretation . variation_descriptor . vcf_record . chrom . replace ( \"chr\" , \"\" ) ), pos = int ( g . variant_interpretation . variation_descriptor . vcf_record . pos ), ref = g . variant_interpretation . variation_descriptor . vcf_record . ref , alt = g . variant_interpretation . variation_descriptor . vcf_record . alt , ) variants . append ( variant ) return variants def check_incomplete_variant_record ( self ) -> bool : \"\"\" Check if any variant record in the phenopacket has incomplete information. This method iterates through the diagnosed variant records and checks if any of them have missing or incomplete information such as empty chromosome, position, reference, or alternate allele. Returns: bool: True if any variant record is incomplete, False otherwise. \"\"\" variants = self . diagnosed_variants () for variant in variants : if ( variant . chrom == \"\" or variant . pos == 0 or variant . pos == \"\" or variant . ref == \"\" or variant . alt == \"\" ): return True return False def check_incomplete_gene_record ( self ) -> bool : \"\"\" Check if any gene record in the phenopacket has incomplete information. This method iterates through the diagnosed gene records and checks if any of them have missing or incomplete information such as gene name, or gene identifier. Returns: bool: True if any gene record is incomplete, False otherwise. \"\"\" genes = self . diagnosed_genes () for gene in genes : if gene . gene_symbol == \"\" or gene . gene_identifier == \"\" : return True return False def check_incomplete_disease_record ( self ) -> bool : \"\"\" Check if any disease record in the phenopacket has incomplete information. This method iterates through the diagnosed disease records and checks if any of them have missing or incomplete information such as empty disease name, or disease identifier. Returns: bool: True if any disease record is incomplete, False otherwise. \"\"\" if len ( self . diagnoses ()) == 0 : return True return False PhenopacketUtil proves particularly beneficial in scenarios where the tool for which you're crafting a runner implementation does not directly accept Phenopackets as inputs. Instead, it might require elements\u2014such as HPO IDs\u2014 via the command-line interface (CLI). In this context, leveraging PhenopacketUtil within the runner's preparation phase enables the extraction of observed phenotypic features from the Phenopacket input, facilitating seamless processing. An example of how this could be implemented is outlined here: from pheval.utils.phenopacket_utils import phenopacket_reader from pheval.utils.phenopacket_utils import PhenopacketUtil phenopacket = phenopacket_reader ( \"/path/to/phenopacket.json\" ) phenopacket_util = PhenopacketUtil ( phenopacket ) # To return a list of all observed phenotypes for a phenopacket observed_phenotypes = phenopacket_util . observed_phenotypic_features () # To extract just the HPO ID as a list observed_phenotypes_hpo_ids = [ observed_phenotype . id for observed_phenotype in observed_phenotypes ]","title":"Utility methods"},{"location":"developing_a_pheval_plugin/#additional-tool-specific-configurations","text":"For the pheval run command to execute successfully, a config.yaml should be found within the input directory supplied on the CLI. tool : tool_version : variant_analysis : gene_analysis : disease_analysis : tool_specific_configuration_options : The tool_specific_configuration_options is an optional field that can be populated with any variables specific to your runner implementation that is required for the running of your tool. All other fields are required to be filled in. The variant_analysis , gene_analysis , and disease_analysis are set as booleans and are for specifying what type of analysis/prioritisation the tool outputs. To populate the tool_specific_configurations_options with customised data, we suggest using the pydantic package as it can easily parse the data from the yaml structure. e.g., Define a BaseModel class with the fields that will populate the tool_specific_configuration_options from pydantic import BaseModel , Field class CustomisedConfigurations ( BaseModel ): \"\"\" Class for defining the customised configurations in tool_specific_configurations field, within the input_dir config.yaml Args: environment (str): Environment to run \"\"\" environment : str = Field ( ... ) Within your runner parse the field into an object. from dataclasses import dataclass from pheval.runners.runner import PhEvalRunner from pathlib import Path @dataclass class CustomPhevalRunner ( PhEvalRunner ): \"\"\"CustomPhevalRunner Class.\"\"\" input_dir : Path testdata_dir : Path tmp_dir : Path output_dir : Path config_file : Path version : str def prepare ( self ): \"\"\"prepare method.\"\"\" print ( \"preparing\" ) config = CustomisedConfigurations . parse_obj ( self . input_dir_config . tool_specific_configuration_options ) environment = config . environment def run ( self ): \"\"\"run method.\"\"\" print ( \"running with custom pheval runner\" ) def post_process ( self ): \"\"\"post_process method.\"\"\" print ( \"post processing\" )","title":"Additional tool-specific configurations"},{"location":"developing_a_pheval_plugin/#post-processing-methods","text":"PhEval currently supports the benchmarking of gene, variant, and disease prioritisation results. To benchmark these result types, PhEval TSV result files need to be generated. PhEval can deal with the ranking and generation of these files to the correct location. However, the runner implementation must handle the extraction of essential data from the tool-specific raw results. This involves transforming them into a list comprising PhEval data classes, with each instance representing a result entry. The dataclasses representing essential information extracted from tool-specific output for gene, variant, and disease prioritisation are defined as follows: Bases: PhEvalResult Minimal data required from tool-specific output for gene prioritisation result Args: gene_symbol (Union[List[str], str]): The gene symbol(s) for the result entry gene_identifier (Union[List[str], str]): The ENSEMBL gene identifier(s) for the result entry score (float): The score for the gene result entry Notes: While we recommend providing the gene identifier in the ENSEMBL namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. Source code in src/pheval/post_processing/post_processing.py 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 @dataclass class PhEvalGeneResult ( PhEvalResult ): \"\"\"Minimal data required from tool-specific output for gene prioritisation result Args: gene_symbol (Union[List[str], str]): The gene symbol(s) for the result entry gene_identifier (Union[List[str], str]): The ENSEMBL gene identifier(s) for the result entry score (float): The score for the gene result entry Notes: While we recommend providing the gene identifier in the ENSEMBL namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. \"\"\" gene_symbol : Union [ List [ str ], str ] gene_identifier : Union [ List [ str ], str ] score : float Bases: PhEvalResult Minimal data required from tool-specific output for variant prioritisation Args: chromosome (str): The chromosome position of the variant recommended to be provided in the following format. This includes numerical designations from 1 to 22 representing autosomal chromosomes, as well as the sex chromosomes X and Y, and the mitochondrial chromosome MT. start (int): The start position of the variant end (int): The end position of the variant ref (str): The reference allele of the variant alt (str): The alternate allele of the variant score (float): The score for the variant result entry Notes: While we recommend providing the variant's chromosome in the specified format, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. Source code in src/pheval/post_processing/post_processing.py 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 @dataclass class PhEvalVariantResult ( PhEvalResult ): \"\"\"Minimal data required from tool-specific output for variant prioritisation Args: chromosome (str): The chromosome position of the variant recommended to be provided in the following format. This includes numerical designations from 1 to 22 representing autosomal chromosomes, as well as the sex chromosomes X and Y, and the mitochondrial chromosome MT. start (int): The start position of the variant end (int): The end position of the variant ref (str): The reference allele of the variant alt (str): The alternate allele of the variant score (float): The score for the variant result entry Notes: While we recommend providing the variant's chromosome in the specified format, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. \"\"\" chromosome : str start : int end : int ref : str alt : str score : float Bases: PhEvalResult Minimal data required from tool-specific output for disease prioritisation Args: disease_name (str): Disease name for the result entry disease_identifier (str): Identifier for the disease result entry in the OMIM namespace score (str): Score for the disease result entry Notes: While we recommend providing the disease identifier in the OMIM namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. Source code in src/pheval/post_processing/post_processing.py 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 @dataclass class PhEvalDiseaseResult ( PhEvalResult ): \"\"\"Minimal data required from tool-specific output for disease prioritisation Args: disease_name (str): Disease name for the result entry disease_identifier (str): Identifier for the disease result entry in the OMIM namespace score (str): Score for the disease result entry Notes: While we recommend providing the disease identifier in the OMIM namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. \"\"\" disease_name : str disease_identifier : str score : float The generate_pheval_result() can be implemented in your runner to write out the PhEval TSV results. An example of how the method can be called is outlined here: from pheval.post_processing.post_processing import generate_pheval_result generate_pheval_result ( pheval_result = pheval_gene_result , # this is the list of extracted PhEval result requirements sort_order_str = \"descending\" , # or can be ascending - this determines in which order the scores will be ranked output_dir = output_directory , # this can be accessed from the runner instance e.g., self.output_dir tool_result_path = tool_result_json # this is the path to the tool-specific raw results file )","title":"Post-processing methods"},{"location":"developing_a_pheval_plugin/#adding-metadata-to-the-resultsyml","text":"By default, PhEval will write a results.yml to the output directory supplied on the CLI. The results.yml contains basic metadata regarding the run configuration, however, there is also the option to add customised run metadata to the results.yml in the tool_specific_configuration_options field. To achieve this, you'll need to create a construct_meta_data() method within your runner implementation. This method is responsible for appending customised metadata to the metadata object in the form of a defined dataclass. It should return the entire metadata object once the addition is completed. e.g., Defined customised metadata dataclass: from dataclasses import dataclass @dataclass class CustomisedMetaData : customised_field : str Example of implementation in the runner. from dataclasses import dataclass from pheval.runners.runner import PhEvalRunner from pathlib import Path @dataclass class CustomPhevalRunner ( PhEvalRunner ): \"\"\"CustomPhevalRunner Class.\"\"\" input_dir : Path testdata_dir : Path tmp_dir : Path output_dir : Path config_file : Path version : str def prepare ( self ): \"\"\"prepare method.\"\"\" print ( \"preparing\" ) def run ( self ): \"\"\"run method.\"\"\" print ( \"running with custom pheval runner\" ) def post_process ( self ): \"\"\"post_process method.\"\"\" print ( \"post processing\" ) def construct_meta_data ( self ): \"\"\"Add metadata.\"\"\" self . meta_data . tool_specific_configuration_options = CustomisedMetaData ( customised_field = \"customised_value\" ) return self . meta_data","title":"Adding metadata to the results.yml"},{"location":"developing_a_pheval_plugin/#6-test-it","text":"To update your custom pheval runner implementation, you must first install the package poetry install Now you have to be able to run PhEval passing your custom runner as parameter. e.g., pheval run -i ./input_dir -t ./test_data_dir -r 'customphevalrunner' -o output_dir The -r parameter stands for your plugin runner class name, and it must be entirely lowercase. Output: preparing running with custom pheval Runner post processing Pay attention to \" running with custom pheval Runner \" line, this is exactly what we had implemented in the CustomPhevalRunner Example","title":"6. Test it."},{"location":"exomiser_pipeline/","text":"PhEval Pipeline Exomiser Runner Step by Step to PhEval Run Pipeline (with ExomiserRunner) 1. Download Exomiser Software wget https://github.com/exomiser/Exomiser/releases/download/13.2.0/exomiser-cli-13.2.0-distribution.zip 2. Download Phenotype Data wget https://data.monarchinitiative.org/exomiser/latest/2302_hg19.zip wget https://data.monarchinitiative.org/exomiser/latest/2302_hg38.zip wget https://data.monarchinitiative.org/exomiser/latest/2302_phenotype.zip 3. Unzip data # unzip the distribution and data files - this will create a directory called 'exomiser-cli-13.1.0' in the current working directory unzip exomiser-cli-13.2.0-distribution.zip unzip 2302_hg19.zip -d exomiser-cli-13.2.0/data unzip 2302_hg38.zip -d exomiser-cli-13.2.0/data 4. Clone PhEval repo and follow steps described in Pipeline Documentation: git clone https://github.com/monarch-initiative/pheval.git cd pheval poetry shell poetry install pip install pheval.exomiser 5. Set PhEval Config YAML File directories : tmp : data/tmp exomiser : /path_where_exomiser_was_extracted phenotype : /path_where_phenotype_was_extracted workspace : /pheval's_path # path where pheval was cloned corpora : - id : small_test scrambled : - factor : 0.5 - factor : 0.7 custom_variants : - id : no_phenotype configs : - tool : exomiser version : 13.2.0 configuration : default exomiser_db : semsim1 runs : - tool : exomiser configuration : default corpus : small_test corpusvariant : scrambled-0.5 version : 13.2.0 6. Generate Makefile based on configuration bash ./resources/generatemakefile.sh 7. Exomiser Runner requires the following configuration The config.yaml file should be formatted like the example below and must be placed in exomiser: /path_where_exomiser_was_extracted declared in pheval-config.yaml file. tool : exomiser tool_version : 13.2.0 variant_analysis : True gene_analysis : True disease_analysis : True tool_specific_configuration_options : environment : local exomiser_software_directory : . analysis_configuration_file : preset-exome-analysis.yml max_jobs : 0 application_properties : remm_version : cadd_version : hg19_data_version : 2302 hg19_local_frequency_path : hg38_data_version : 2302 phenotype_data_version : 2302 cache_type : cache_caffeine_spec : post_process : score_name : combinedScore sort_order : DESCENDING 8. Preset Exome Analysis File Exomiser requires a preset-exome-analysis.yml file saved at /path_where_exomiser_was_extracted/preset-exome-analysis.yml This is an example of preset-exome-analysis.yml file ## Exomiser Analysis Template. # These are all the possible options for running exomiser. Use this as a template for # your own set-up. --- analysisMode : PASS_ONLY inheritanceModes : { AUTOSOMAL_DOMINANT : 0.1 , AUTOSOMAL_RECESSIVE_HOM_ALT : 0.1 , AUTOSOMAL_RECESSIVE_COMP_HET : 2.0 , X_DOMINANT : 0.1 , X_RECESSIVE_HOM_ALT : 0.1 , X_RECESSIVE_COMP_HET : 2.0 , MITOCHONDRIAL : 0.2 } frequencySources : [ THOUSAND_GENOMES , TOPMED , UK10K , ESP_AFRICAN_AMERICAN , ESP_EUROPEAN_AMERICAN , ESP_ALL , EXAC_AFRICAN_INC_AFRICAN_AMERICAN , EXAC_AMERICAN , EXAC_SOUTH_ASIAN , EXAC_EAST_ASIAN , EXAC_FINNISH , EXAC_NON_FINNISH_EUROPEAN , EXAC_OTHER , GNOMAD_E_AFR , GNOMAD_E_AMR , # GNOMAD_E_ASJ, GNOMAD_E_EAS , GNOMAD_E_FIN , GNOMAD_E_NFE , GNOMAD_E_OTH , GNOMAD_E_SAS , GNOMAD_G_AFR , GNOMAD_G_AMR , # GNOMAD_G_ASJ, GNOMAD_G_EAS , GNOMAD_G_FIN , GNOMAD_G_NFE , GNOMAD_G_OTH , GNOMAD_G_SAS ] # Possible pathogenicitySources: (POLYPHEN, MUTATION_TASTER, SIFT), (REVEL, MVP), CADD, REMM # REMM is trained on non-coding regulatory regions # *WARNING* if you enable CADD or REMM ensure that you have downloaded and installed the CADD/REMM tabix files # and updated their location in the application.properties. Exomiser will not run without this. pathogenicitySources : [ REVEL , MVP ] #this is the standard exomiser order. steps : [ failedVariantFilter : { }, variantEffectFilter : { remove : [ FIVE_PRIME_UTR_EXON_VARIANT , FIVE_PRIME_UTR_INTRON_VARIANT , THREE_PRIME_UTR_EXON_VARIANT , THREE_PRIME_UTR_INTRON_VARIANT , NON_CODING_TRANSCRIPT_EXON_VARIANT , NON_CODING_TRANSCRIPT_INTRON_VARIANT , CODING_TRANSCRIPT_INTRON_VARIANT , UPSTREAM_GENE_VARIANT , DOWNSTREAM_GENE_VARIANT , INTERGENIC_VARIANT , REGULATORY_REGION_VARIANT ] }, frequencyFilter : { maxFrequency : 2.0 }, pathogenicityFilter : { keepNonPathogenic : true }, inheritanceFilter : { }, omimPrioritiser : { }, hiPhivePrioritiser : { } ] 9. PhEval Run make pheval run","title":"PhEval Pipeline Exomiser Runner"},{"location":"exomiser_pipeline/#pheval-pipeline-exomiser-runner","text":"","title":"PhEval Pipeline Exomiser Runner"},{"location":"exomiser_pipeline/#step-by-step-to-pheval-run-pipeline-with-exomiserrunner","text":"","title":"Step by Step to PhEval Run Pipeline (with ExomiserRunner)"},{"location":"exomiser_pipeline/#1-download-exomiser-software","text":"wget https://github.com/exomiser/Exomiser/releases/download/13.2.0/exomiser-cli-13.2.0-distribution.zip","title":"1. Download Exomiser Software"},{"location":"exomiser_pipeline/#2-download-phenotype-data","text":"wget https://data.monarchinitiative.org/exomiser/latest/2302_hg19.zip wget https://data.monarchinitiative.org/exomiser/latest/2302_hg38.zip wget https://data.monarchinitiative.org/exomiser/latest/2302_phenotype.zip","title":"2. Download Phenotype Data"},{"location":"exomiser_pipeline/#3-unzip-data","text":"# unzip the distribution and data files - this will create a directory called 'exomiser-cli-13.1.0' in the current working directory unzip exomiser-cli-13.2.0-distribution.zip unzip 2302_hg19.zip -d exomiser-cli-13.2.0/data unzip 2302_hg38.zip -d exomiser-cli-13.2.0/data","title":"3. Unzip data"},{"location":"exomiser_pipeline/#4-clone-pheval-repo-and-follow-steps-described-in-pipeline-documentation","text":"git clone https://github.com/monarch-initiative/pheval.git cd pheval poetry shell poetry install pip install pheval.exomiser","title":"4. Clone PhEval repo and follow steps described in Pipeline Documentation:"},{"location":"exomiser_pipeline/#5-set-pheval-config-yaml-file","text":"directories : tmp : data/tmp exomiser : /path_where_exomiser_was_extracted phenotype : /path_where_phenotype_was_extracted workspace : /pheval's_path # path where pheval was cloned corpora : - id : small_test scrambled : - factor : 0.5 - factor : 0.7 custom_variants : - id : no_phenotype configs : - tool : exomiser version : 13.2.0 configuration : default exomiser_db : semsim1 runs : - tool : exomiser configuration : default corpus : small_test corpusvariant : scrambled-0.5 version : 13.2.0","title":"5. Set PhEval Config YAML File"},{"location":"exomiser_pipeline/#6-generate-makefile-based-on-configuration","text":"bash ./resources/generatemakefile.sh","title":"6. Generate Makefile based on configuration"},{"location":"exomiser_pipeline/#7-exomiser-runner-requires-the-following-configuration","text":"The config.yaml file should be formatted like the example below and must be placed in exomiser: /path_where_exomiser_was_extracted declared in pheval-config.yaml file. tool : exomiser tool_version : 13.2.0 variant_analysis : True gene_analysis : True disease_analysis : True tool_specific_configuration_options : environment : local exomiser_software_directory : . analysis_configuration_file : preset-exome-analysis.yml max_jobs : 0 application_properties : remm_version : cadd_version : hg19_data_version : 2302 hg19_local_frequency_path : hg38_data_version : 2302 phenotype_data_version : 2302 cache_type : cache_caffeine_spec : post_process : score_name : combinedScore sort_order : DESCENDING","title":"7. Exomiser Runner requires the following configuration"},{"location":"exomiser_pipeline/#8-preset-exome-analysis-file","text":"Exomiser requires a preset-exome-analysis.yml file saved at /path_where_exomiser_was_extracted/preset-exome-analysis.yml This is an example of preset-exome-analysis.yml file ## Exomiser Analysis Template. # These are all the possible options for running exomiser. Use this as a template for # your own set-up. --- analysisMode : PASS_ONLY inheritanceModes : { AUTOSOMAL_DOMINANT : 0.1 , AUTOSOMAL_RECESSIVE_HOM_ALT : 0.1 , AUTOSOMAL_RECESSIVE_COMP_HET : 2.0 , X_DOMINANT : 0.1 , X_RECESSIVE_HOM_ALT : 0.1 , X_RECESSIVE_COMP_HET : 2.0 , MITOCHONDRIAL : 0.2 } frequencySources : [ THOUSAND_GENOMES , TOPMED , UK10K , ESP_AFRICAN_AMERICAN , ESP_EUROPEAN_AMERICAN , ESP_ALL , EXAC_AFRICAN_INC_AFRICAN_AMERICAN , EXAC_AMERICAN , EXAC_SOUTH_ASIAN , EXAC_EAST_ASIAN , EXAC_FINNISH , EXAC_NON_FINNISH_EUROPEAN , EXAC_OTHER , GNOMAD_E_AFR , GNOMAD_E_AMR , # GNOMAD_E_ASJ, GNOMAD_E_EAS , GNOMAD_E_FIN , GNOMAD_E_NFE , GNOMAD_E_OTH , GNOMAD_E_SAS , GNOMAD_G_AFR , GNOMAD_G_AMR , # GNOMAD_G_ASJ, GNOMAD_G_EAS , GNOMAD_G_FIN , GNOMAD_G_NFE , GNOMAD_G_OTH , GNOMAD_G_SAS ] # Possible pathogenicitySources: (POLYPHEN, MUTATION_TASTER, SIFT), (REVEL, MVP), CADD, REMM # REMM is trained on non-coding regulatory regions # *WARNING* if you enable CADD or REMM ensure that you have downloaded and installed the CADD/REMM tabix files # and updated their location in the application.properties. Exomiser will not run without this. pathogenicitySources : [ REVEL , MVP ] #this is the standard exomiser order. steps : [ failedVariantFilter : { }, variantEffectFilter : { remove : [ FIVE_PRIME_UTR_EXON_VARIANT , FIVE_PRIME_UTR_INTRON_VARIANT , THREE_PRIME_UTR_EXON_VARIANT , THREE_PRIME_UTR_INTRON_VARIANT , NON_CODING_TRANSCRIPT_EXON_VARIANT , NON_CODING_TRANSCRIPT_INTRON_VARIANT , CODING_TRANSCRIPT_INTRON_VARIANT , UPSTREAM_GENE_VARIANT , DOWNSTREAM_GENE_VARIANT , INTERGENIC_VARIANT , REGULATORY_REGION_VARIANT ] }, frequencyFilter : { maxFrequency : 2.0 }, pathogenicityFilter : { keepNonPathogenic : true }, inheritanceFilter : { }, omimPrioritiser : { }, hiPhivePrioritiser : { } ]","title":"8. Preset Exome Analysis File"},{"location":"exomiser_pipeline/#9-pheval-run","text":"make pheval run","title":"9. PhEval Run"},{"location":"pipeline/","text":"PhEval Pipeline 1. Clone PhEval git clone https://github.com/monarch-initiative/pheval.git 2. Installing PhEval dependencies Enter in the cloned folder and enter the following commands: poetry shell poetry install 3. Generate custom Makefile You must have Jinja2 installed, if you don't follow the steps here In resources folder are the following files responsible for makefile generation: \ud83d\udce6resources \u2523 \ud83d\udcdcMakefile.j2 \u2523 \ud83d\udcdccustom.Makefile \u2523 \ud83d\udcdcgeneratemakefile.sh \u2517 \ud83d\udcdcpheval-config.yaml You must edit the pheval-config.yaml file setting the directory where you extracted exomiser and phenotype data. An example could be found here . After setting the pheval-config.yaml file flowchart TD inputs[\"prepare-inputs\"] sr1[\"Setting up Runners\"] corpora[\"prepare-corpora\"] scrambling[\"Scrambing Process\"] r1[\"run\"] inputs === sr1 sr1 === corpora corpora === scrambling scrambling === r1 Data Flow flowchart LR vcf[(\"Phenopackets Original Data\")] pheno[(\"Scrambled Phenopackets\")] result[\"Phenotype Result\"] vcf -- prepare-corpora --> pheno pheno -- scramble factor e.g 0.5 --> result Jinja Template PhEval Makefile Generator Requirements To generate a PhEval Makefile we use the Jinja template engine. Installing Jinja Template Linux (Ubuntu): sudo snap install j2 Mac OS: PhEval Makefile Template (.j2 file) \ud83d\udce6resources \u2523 \ud83d\udcdc Makefile.j2 custom.Makefile is the template that will be generated on the fly based on the pheval-config.yaml . Each of these configurations is filled using a syntax like this: {{ config.tool }} . The value between the curly brackets is replaced by the corresponding configuration in the configuration file. PhEval custom.Makefile \ud83d\udce6resources \u2523 \ud83d\udcdc custom.Makefile PhEval generatemakefile.sh \ud83d\udce6resources \u2523 \ud83d\udcdcgeneratemakefile.sh generatemakefile.sh is only a shortcut for Makefile rendering using the configuration file e.g. bash ./resources/generatemakefile.sh PhEval Configuration File In resources folder, there is a file named pheval-config.yaml , this file is responsible for storing the PhEval Makefile generation. \ud83d\udce6resources \u2517 \ud83d\udcdcpheval-config.yaml Directories Section directories : tmp : data/tmp h2jar : ./h2-1.4.199.jar phen2gene : ./Phen2Gene exomiser : /home/data/exomiser/exomiser-cli-13.2.0-distribution/exomiser-cli-13.2.0 phenotype : /home/data/phenotype workspace : /tmp/pheval Configs Section configs : - tool : phen2gene version : 1.2.3 configuration : default - tool : exomiser version : 13.2.0 configuration : default exomiser_db : semsim1 This section is responsible for setting up the configuration folder. All software declared in the configs section will be linked in this folder. In the configuration above, for example, we have one configuration for phen2gene and one for exomiser. In the Directories Section , these two configurations must have one corresponding property set up. PhEval pipeline invokes the prepare-inputs goal, and in the preceding example, a configuration folder structure will be built that looks like this: \ud83d\udce6configurations \u2523 \ud83d\udcc2exomiser-13.2.0-default \u2517 \ud83d\udcc2phen2gene-1.2.3-default Each of these folders is a symbolic link that points to the corresponding software folder indicated in the Directories Section Corpora Section corpora : - id : lirical scrambled : - factor : 0.5 - factor : 0.7 custom_variants : - id : no_phenotype - id : phen2gene scrambled : - factor : 0.2 - factor : 0.9 custom_variants : - id : no_phenotype In this corpora section we can set up different experiments for corpus scrambling. Currently, PhEval provides corpora data from lirical, phen2gene, small_test and structural_variants \ud83d\udce6corpora \u2523 \ud83d\udcc2lirical \u2523 \ud83d\udcc2phen2gene \u2523 \ud83d\udcc2small_test \u2517 \ud83d\udcc2structural_variants The scramble property defines the magnitude of the scrambling factor during Phenopackets and VCF variants spiking process. Using the configuration in the example above, a corpora structure will be created like this: \ud83d\udce6corpora \u2523 \ud83d\udcc2lirical \u2503 \u2517 \ud83d\udcc2default \u2503 \u2517 \ud83d\udcc2scrambled-0.5 \u2503 \u2517 \ud83d\udcc2scrambled-0.7 \u2523 \ud83d\udcc2phen2gene \u2503 \u2517 \ud83d\udcc2default \u2503 \u2517 \ud83d\udcc2scrambled-0.2 \u2503 \u2517 \ud83d\udcc2scrambled-0.9 Runs Section runs : - tool : exomiser configuration : default corpus : lirical corpusvariant : scrambled-0.5 version : 13.2.0 - tool : phen2gene configuration : default corpus : phen2gene corpusvariant : scrambled-0.2 version : 1.2.3 Phen2Gen Specific Configuration The input directory config.yaml should be formatted like the example below and must be placed in phen2gene: /pathtoPhen2Gene/Phen2Gene declared in pheval-config.yaml file. tool : phen2gene tool_version : 1.2.3 phenotype_only : True tool_specific_configuration_options : environment : local phen2gene_python_executable : phen2gene.py post_process : score_order : descending Makefile Goals make pheval this runs the entire pipeline including corpus preparation and pheval run $(MAKE) prepare-inputs $(MAKE) prepare-corpora $(MAKE) pheval-run make semsim generate all configured similarity profiles make semsim-shuffle generate new ontology terms to the semsim process make semsim-scramble scramble semsim profile make semsim-convert convert all semsim profiles into exomiser SQL format make semsim-ingest takes all the configured semsim profiles and loads them into the exomiser databases","title":"PhEval Pipeline"},{"location":"pipeline/#pheval-pipeline","text":"","title":"PhEval Pipeline"},{"location":"pipeline/#1-clone-pheval","text":"git clone https://github.com/monarch-initiative/pheval.git","title":"1. Clone PhEval"},{"location":"pipeline/#2-installing-pheval-dependencies","text":"Enter in the cloned folder and enter the following commands: poetry shell poetry install","title":"2. Installing PhEval dependencies"},{"location":"pipeline/#3-generate-custom-makefile","text":"You must have Jinja2 installed, if you don't follow the steps here In resources folder are the following files responsible for makefile generation: \ud83d\udce6resources \u2523 \ud83d\udcdcMakefile.j2 \u2523 \ud83d\udcdccustom.Makefile \u2523 \ud83d\udcdcgeneratemakefile.sh \u2517 \ud83d\udcdcpheval-config.yaml You must edit the pheval-config.yaml file setting the directory where you extracted exomiser and phenotype data. An example could be found here . After setting the pheval-config.yaml file flowchart TD inputs[\"prepare-inputs\"] sr1[\"Setting up Runners\"] corpora[\"prepare-corpora\"] scrambling[\"Scrambing Process\"] r1[\"run\"] inputs === sr1 sr1 === corpora corpora === scrambling scrambling === r1","title":"3. Generate custom Makefile"},{"location":"pipeline/#data-flow","text":"flowchart LR vcf[(\"Phenopackets Original Data\")] pheno[(\"Scrambled Phenopackets\")] result[\"Phenotype Result\"] vcf -- prepare-corpora --> pheno pheno -- scramble factor e.g 0.5 --> result","title":"Data Flow"},{"location":"pipeline/#jinja-template-pheval-makefile-generator-requirements","text":"To generate a PhEval Makefile we use the Jinja template engine.","title":"Jinja Template PhEval Makefile Generator Requirements"},{"location":"pipeline/#installing-jinja-template","text":"Linux (Ubuntu): sudo snap install j2 Mac OS:","title":"Installing Jinja Template"},{"location":"pipeline/#pheval-makefile-template-j2-file","text":"\ud83d\udce6resources \u2523 \ud83d\udcdc Makefile.j2 custom.Makefile is the template that will be generated on the fly based on the pheval-config.yaml . Each of these configurations is filled using a syntax like this: {{ config.tool }} . The value between the curly brackets is replaced by the corresponding configuration in the configuration file.","title":"PhEval Makefile Template (.j2 file)"},{"location":"pipeline/#pheval-custommakefile","text":"\ud83d\udce6resources \u2523 \ud83d\udcdc custom.Makefile","title":"PhEval custom.Makefile"},{"location":"pipeline/#pheval-generatemakefilesh","text":"\ud83d\udce6resources \u2523 \ud83d\udcdcgeneratemakefile.sh generatemakefile.sh is only a shortcut for Makefile rendering using the configuration file e.g. bash ./resources/generatemakefile.sh","title":"PhEval generatemakefile.sh"},{"location":"pipeline/#pheval-configuration-file","text":"In resources folder, there is a file named pheval-config.yaml , this file is responsible for storing the PhEval Makefile generation. \ud83d\udce6resources \u2517 \ud83d\udcdcpheval-config.yaml","title":"PhEval Configuration File"},{"location":"pipeline/#directories-section","text":"directories : tmp : data/tmp h2jar : ./h2-1.4.199.jar phen2gene : ./Phen2Gene exomiser : /home/data/exomiser/exomiser-cli-13.2.0-distribution/exomiser-cli-13.2.0 phenotype : /home/data/phenotype workspace : /tmp/pheval","title":"Directories Section"},{"location":"pipeline/#configs-section","text":"configs : - tool : phen2gene version : 1.2.3 configuration : default - tool : exomiser version : 13.2.0 configuration : default exomiser_db : semsim1 This section is responsible for setting up the configuration folder. All software declared in the configs section will be linked in this folder. In the configuration above, for example, we have one configuration for phen2gene and one for exomiser. In the Directories Section , these two configurations must have one corresponding property set up. PhEval pipeline invokes the prepare-inputs goal, and in the preceding example, a configuration folder structure will be built that looks like this: \ud83d\udce6configurations \u2523 \ud83d\udcc2exomiser-13.2.0-default \u2517 \ud83d\udcc2phen2gene-1.2.3-default Each of these folders is a symbolic link that points to the corresponding software folder indicated in the Directories Section","title":"Configs Section"},{"location":"pipeline/#corpora-section","text":"corpora : - id : lirical scrambled : - factor : 0.5 - factor : 0.7 custom_variants : - id : no_phenotype - id : phen2gene scrambled : - factor : 0.2 - factor : 0.9 custom_variants : - id : no_phenotype In this corpora section we can set up different experiments for corpus scrambling. Currently, PhEval provides corpora data from lirical, phen2gene, small_test and structural_variants \ud83d\udce6corpora \u2523 \ud83d\udcc2lirical \u2523 \ud83d\udcc2phen2gene \u2523 \ud83d\udcc2small_test \u2517 \ud83d\udcc2structural_variants The scramble property defines the magnitude of the scrambling factor during Phenopackets and VCF variants spiking process. Using the configuration in the example above, a corpora structure will be created like this: \ud83d\udce6corpora \u2523 \ud83d\udcc2lirical \u2503 \u2517 \ud83d\udcc2default \u2503 \u2517 \ud83d\udcc2scrambled-0.5 \u2503 \u2517 \ud83d\udcc2scrambled-0.7 \u2523 \ud83d\udcc2phen2gene \u2503 \u2517 \ud83d\udcc2default \u2503 \u2517 \ud83d\udcc2scrambled-0.2 \u2503 \u2517 \ud83d\udcc2scrambled-0.9","title":"Corpora Section"},{"location":"pipeline/#runs-section","text":"runs : - tool : exomiser configuration : default corpus : lirical corpusvariant : scrambled-0.5 version : 13.2.0 - tool : phen2gene configuration : default corpus : phen2gene corpusvariant : scrambled-0.2 version : 1.2.3","title":"Runs Section"},{"location":"pipeline/#phen2gen-specific-configuration","text":"The input directory config.yaml should be formatted like the example below and must be placed in phen2gene: /pathtoPhen2Gene/Phen2Gene declared in pheval-config.yaml file. tool : phen2gene tool_version : 1.2.3 phenotype_only : True tool_specific_configuration_options : environment : local phen2gene_python_executable : phen2gene.py post_process : score_order : descending","title":"Phen2Gen Specific Configuration"},{"location":"pipeline/#makefile-goals","text":"","title":"Makefile Goals"},{"location":"pipeline/#make-pheval","text":"this runs the entire pipeline including corpus preparation and pheval run $(MAKE) prepare-inputs $(MAKE) prepare-corpora $(MAKE) pheval-run","title":"make pheval"},{"location":"pipeline/#make-semsim","text":"generate all configured similarity profiles","title":"make semsim"},{"location":"pipeline/#make-semsim-shuffle","text":"generate new ontology terms to the semsim process","title":"make semsim-shuffle"},{"location":"pipeline/#make-semsim-scramble","text":"scramble semsim profile","title":"make semsim-scramble"},{"location":"pipeline/#make-semsim-convert","text":"convert all semsim profiles into exomiser SQL format","title":"make semsim-convert"},{"location":"pipeline/#make-semsim-ingest","text":"takes all the configured semsim profiles and loads them into the exomiser databases","title":"make semsim-ingest"},{"location":"plugins/","text":"A full list of implemented PhEval runners are listed below along with links to the original tool: Tool PhEval plugin Comment Exomiser Exomiser runner The link to the original tool can be found here Phen2Gene Phen2Gene runner The link to the original tool can be found here LIRICAL LIRICAL runner The link to the original tool can be found here SvAnna SvAnna runner The link to the original tool can be found here GADO GADO runner The link to the original tool can be found here Template Template runner OntoGPT OntoGPT runner ELDER ELDER runner MALCO MALCO runner AI MARRVEL AI MARRVEL runner The link to the original tool can be found here OAK OAK runner","title":"Plugins"},{"location":"roadmap/","text":"Roadmap The Roadmap is a rough plan, changes are expected throughout the year. 2023 Q1 Finalising the PhEval architecture (draft is done) End-to-end pipeline for testing PhEval with Exomiser and two versions of HPO Submitting a poster to Biocuration which outlines the full vision Q2 Focus on an analytic framework around PhEval, focusing on studying how changes to ontologies affect changes in variant prioritisation Extend phenotype pipeline to enable base releases and alternative patterns Q3 Improving the analytic framework of PhEval, especially phenotype analysis All intermediate files of pipeline have a corresponding LinkML model Focus on studying the effect of KG snippets (p2ds) on VP performance Q4 Drafting a PhEval paper Building standalone pipeline that reports changes in algorithm behaviours to ontology developers.","title":"Roadmap"},{"location":"roadmap/#roadmap","text":"The Roadmap is a rough plan, changes are expected throughout the year.","title":"Roadmap"},{"location":"roadmap/#2023","text":"","title":"2023"},{"location":"roadmap/#q1","text":"Finalising the PhEval architecture (draft is done) End-to-end pipeline for testing PhEval with Exomiser and two versions of HPO Submitting a poster to Biocuration which outlines the full vision","title":"Q1"},{"location":"roadmap/#q2","text":"Focus on an analytic framework around PhEval, focusing on studying how changes to ontologies affect changes in variant prioritisation Extend phenotype pipeline to enable base releases and alternative patterns","title":"Q2"},{"location":"roadmap/#q3","text":"Improving the analytic framework of PhEval, especially phenotype analysis All intermediate files of pipeline have a corresponding LinkML model Focus on studying the effect of KG snippets (p2ds) on VP performance","title":"Q3"},{"location":"roadmap/#q4","text":"Drafting a PhEval paper Building standalone pipeline that reports changes in algorithm behaviours to ontology developers.","title":"Q4"},{"location":"styleguide/","text":"Monarch Style Guide for PhEval No code in CLI methods","title":"Monarch Style Guide for PhEval"},{"location":"styleguide/#monarch-style-guide-for-pheval","text":"No code in CLI methods","title":"Monarch Style Guide for PhEval"},{"location":"api/pheval/cli/","text":"main main CLI method for PhEval Args: verbose (int, optional): Verbose flag. quiet (bool, optional): Queit Flag. Usage: main [OPTIONS] COMMAND [ARGS]... Options: Name Type Description Default -v , --verbose integer range ( 0 and above) N/A 0 -q , --quiet text N/A None --help boolean Show this message and exit. False pheval pheval Usage: pheval [OPTIONS] COMMAND [ARGS]... Options: Name Type Description Default --help boolean Show this message and exit. False Subcommands run : PhEval Runner Command Line Interface run PhEval Runner Command Line Interface Args: input_dir (Path): The input directory (relative path: e.g exomiser-13.11) testdata_dir (Path): The input directory (relative path: e.g ./data runner (str): Runner implementation (e.g exomiser-13.11) tmp_dir (Path): The path of the temporary directory (optional) output_dir (Path): The path of the output directory config (Path): The path of the configuration file (optional e.g., config.yaml) version (str): The version of the tool implementation Usage: pheval run [OPTIONS] Options: Name Type Description Default --input-dir , -i Path The input directory (relative path: e.g exomiser-13.11) _required --testdata-dir , -t Path The input directory (relative path: e.g ./data) _required --runner , -r text Runner implementation (e.g exomiser-13.11) _required --tmp-dir , -m Path The path of the temporary directory (optional) None --output-dir , -o Path The path of the output directory _required --config , -c Path The path of the configuration file (optional e.g config.yaml) None --version , -v text Version of the tool implementation. None --help boolean Show this message and exit. False pheval-utils pheval_utils Usage: pheval-utils [OPTIONS] COMMAND [ARGS]... Options: Name Type Description Default --help boolean Show this message and exit. False Subcommands benchmark : Benchmark the gene/variant/disease prioritisation performance for a single run. benchmark-comparison : Benchmark the gene/variant/disease prioritisation performance for two runs. create-spiked-vcfs : generate-stats-plot : Generate bar plot from benchmark stats summary tsv. prepare-corpus : scramble-phenopackets : Generate noisy phenopackets from existing ones. semsim-scramble : Scrambles semsim profile multiplying score value by scramble factor semsim-to-exomiserdb : ingests semsim file into exomiser phenotypic database update-phenopackets : Update gene symbols and identifiers for phenopackets. benchmark Benchmark the gene/variant/disease prioritisation performance for a single run. Usage: pheval-utils benchmark [OPTIONS] Options: Name Type Description Default --directory , -d Path General results directory to be benchmarked, assumes contains subdirectories of pheval_gene_results/,pheval_variant_results/ or pheval_disease_results/. _required --phenopacket-dir , -p Path Full path to directory containing input phenopackets. _required --output-prefix , -o text Output file prefix. _required --score-order , -so choice ( ascending | descending ) Ordering of results for ranking. descending --threshold , -t float Score threshold. 0.0 --gene-analysis / --no-gene-analysis boolean Specify analysis for gene prioritisation False --variant-analysis / --no-variant-analysis boolean Specify analysis for variant prioritisation False --disease-analysis / --no-disease-analysis boolean Specify analysis for disease prioritisation False --plot-type , -y choice ( bar_stacked | bar_cumulative | bar_non_cumulative ) Bar chart type to output. bar_stacked --help boolean Show this message and exit. False benchmark-comparison Benchmark the gene/variant/disease prioritisation performance for two runs. Usage: pheval-utils benchmark-comparison [OPTIONS] Options: Name Type Description Default --run-data , -r Path Path to .txt file containing testdata phenopacket directory and corresponding results directory separated by tab.Each run contained to a new line with the input testdata listed first and on the same line separated by a tabthe results directory. _required --output-prefix , -o text Output file prefix. _required --score-order , -so choice ( ascending | descending ) Ordering of results for ranking. descending --threshold , -t float Score threshold. 0.0 --gene-analysis / --no-gene-analysis boolean Specify analysis for gene prioritisation False --variant-analysis / --no-variant-analysis boolean Specify analysis for variant prioritisation False --disease-analysis / --no-disease-analysis boolean Specify analysis for disease prioritisation False --plot-type , -y choice ( bar_stacked | bar_cumulative | bar_non_cumulative ) Bar chart type to output. bar_cumulative --help boolean Show this message and exit. False create-spiked-vcfs Create spiked VCF from either a Phenopacket or a Phenopacket directory. Args: phenopacket_path (Path): Path to a single Phenopacket file (optional). phenopacket_dir (Path): Path to a directory containing Phenopacket files (optional). output_dir (Path): The directory to store the generated spiked VCF file(s). hg19_template_vcf (Path): Path to the hg19 template VCF file (optional). hg38_template_vcf (Path): Path to the hg38 template VCF file (optional). hg19_vcf_dir (Path): Path to the directory containing the hg19 VCF files (optional). hg38_vcf_dir (Path): Path to the directory containing the hg38 VCF files (optional). Usage: pheval-utils create-spiked-vcfs [OPTIONS] Options: Name Type Description Default --phenopacket-path , -p Path Path to phenopacket. NOTE: This argument is mutually exclusive with arguments: [phenopacket_dir]. None --phenopacket-dir , -P Path Path to phenopacket directory for updating. NOTE: This argument is mutually exclusive with arguments: [phenopacket_path]. None --hg19-template-vcf , -hg19 Path Template hg19 VCF file NOTE: This argument is mutually exclusive with arguments: [hg19_vcf_dir]. None --hg38-template-vcf , -hg38 Path Template hg38 VCF file NOTE: This argument is mutually exclusive with arguments: [hg38_vcf_dir]. None --hg19-vcf-dir , -hg19-dir Path Path to directory containing hg19 VCF templates. NOTE: This argument is mutually exclusive with arguments: [hg19_template_vcf]. None --hg38-vcf-dir , -hg38-dir Path Path to directory containing hg38 VCF templates. NOTE: This argument is mutually exclusive with arguments: [hg38_template_vcf]. None --output-dir , -O Path Path for creation of output directory vcf --help boolean Show this message and exit. False generate-stats-plot Generate bar plot from benchmark stats summary tsv. Usage: pheval-utils generate-stats-plot [OPTIONS] Options: Name Type Description Default --benchmarking-tsv , -b Path Path to benchmark summary tsv output by PhEval benchmark commands. _required --gene-analysis / --no-gene-analysis boolean Specify analysis for gene prioritisation NOTE: This argument is mutually exclusive with arguments: [disease_analysis, variant_analysis]. False --variant-analysis / --no-variant-analysis boolean Specify analysis for variant prioritisation NOTE: This argument is mutually exclusive with arguments: [disease_analysis, gene_analysis]. False --disease-analysis / --no-disease-analysis boolean Specify analysis for disease prioritisation NOTE: This argument is mutually exclusive with arguments: [variant_analysis, gene_analysis]. False --plot-type , -y choice ( bar_stacked | bar_cumulative | bar_non_cumulative ) Bar chart type to output. bar_cumulative --title , -t text Title for plot, specify the title on the CLI enclosed with \"\" None --help boolean Show this message and exit. False prepare-corpus Prepare a corpus of Phenopackets for analysis, optionally checking for complete variant records and updating gene identifiers. Args: phenopacket_dir (Path): The path to the directory containing Phenopackets. variant_analysis (bool): If True, check for complete variant records in the Phenopackets. gene_analysis (bool): If True, check for complete gene records in the Phenopackets. disease_analysis (bool): If True, check for complete disease records in the Phenopackets. gene_identifier (str): Identifier for updating gene identifiers, if applicable. hg19_template_vcf (Path): Path to the hg19 template VCF file (optional). hg38_template_vcf (Path): Path to the hg38 template VCF file (optional). hg19_vcf_dir (Path): Path to the directory containing the hg19 VCF files (optional). hg38_vcf_dir (Path): Path to the directory containing the hg38 VCF files (optional). output_dir (Path): The directory to save the prepared Phenopackets and, optionally, VCF files. Notes: To spike variants into VCFs for variant-based analysis at least one of hg19_template_vcf, hg38_template_vcf, hg19_vcf_dir or hg38_vcf_dir is required. Usage: pheval-utils prepare-corpus [OPTIONS] Options: Name Type Description Default --phenopacket-dir , -p Path Path to phenopacket corpus directory.. _required --variant-analysis / --no-variant-analysis boolean Specify whether to check for complete variant records in the phenopackets. False --gene-analysis / --no-gene-analysis boolean Specify whether to check for complete gene records in the phenopackets. False --disease-analysis / --no-disease-analysis boolean Specify whether to check for complete disease records in the phenopackets. False --gene-identifier , -g choice ( ensembl_id | entrez_id | hgnc_id ) Gene identifier to update in phenopacket None --hg19-template-vcf , -hg19 Path Template hg19 VCF file NOTE: This argument is mutually exclusive with arguments: [hg19_vcf_dir]. None --hg38-template-vcf , -hg38 Path Template hg38 VCF file NOTE: This argument is mutually exclusive with arguments: [hg38_vcf_dir]. None --hg19-vcf-dir , -hg19-dir Path Path to directory containing hg19 VCF templates. NOTE: This argument is mutually exclusive with arguments: [hg19_template_vcf]. None --hg38-vcf-dir , -hg38-dir Path Path to directory containing hg38 VCF templates. NOTE: This argument is mutually exclusive with arguments: [hg38_template_vcf]. None --output-dir , -o Path Path to output prepared corpus. prepared_corpus --help boolean Show this message and exit. False scramble-phenopackets Generate noisy phenopackets from existing ones. Usage: pheval-utils scramble-phenopackets [OPTIONS] Options: Name Type Description Default --phenopacket-path , -p Path Path to phenopacket. NOTE: This argument is mutually exclusive with arguments: [phenopacket_dir]. None --phenopacket-dir , -P Path Path to phenopackets directory. NOTE: This argument is mutually exclusive with arguments: [phenopacket_path]. None --scramble-factor , -s float Scramble factor for randomising phenopacket phenotypic profiles. 0.5 --output-dir , -O Path Path for creation of output directory noisy_phenopackets --help boolean Show this message and exit. False semsim-scramble Scrambles semsim profile multiplying score value by scramble factor Args: input (Path): Path file that points out to the semsim profile output (Path): Path file that points out to the output file score_column (List[str]): Score column(s) that will be scrambled scramble_factor (float): Scramble Magnitude Usage: pheval-utils semsim-scramble [OPTIONS] Options: Name Type Description Default --input , -i Path Path to the semantic similarity profile to be scrambled. _required --output , -o Path Path where the scrambled semsim file will be written. _required --score-column , -c choice ( jaccard_similarity | dice_similarity | phenodigm_score ) Score column that will be scrambled _required --scramble-factor , -s float Scramble Magnitude (noise) that will be applied to semantic similarity score column (e.g. jaccard similarity). 0.5 --help boolean Show this message and exit. False semsim-to-exomiserdb ingests semsim file into exomiser phenotypic database Args: input_file (Path): semsim input file. e.g phenio-plus-hp-mp.0.semsimian.tsv object_prefix (str): object prefix. e.g. MP subject_prefix (str): subject prefix e.g HP db_path (Path): Exomiser Phenotypic Database Folder Path. (e.g. /exomiser_folder/2209_phenotype/2209_phenotype/) Usage: pheval-utils semsim-to-exomiserdb [OPTIONS] Options: Name Type Description Default --input-file , -i Path Semsim input file. _required --object-prefix text Object Prefix. e.g. MP _required --subject-prefix text Subject Prefix. e.g. HP _required --db-path , -d Path Exomiser Phenotypic Database Folder Path. (e.g. /exomiser_folder/2209_phenotype/2209_phenotype/). This is the path where the phenotypic database folder will be written out. _required --help boolean Show this message and exit. False update-phenopackets Update gene symbols and identifiers for phenopackets. Usage: pheval-utils update-phenopackets [OPTIONS] Options: Name Type Description Default --phenopacket-path , -p Path Path to phenopacket. NOTE: This argument is mutually exclusive with arguments: [phenopacket_dir]. None --phenopacket-dir , -P Path Path to phenopacket directory for updating. NOTE: This argument is mutually exclusive with arguments: [phenopacket_path]. None --output-dir , -o Path Path to write phenopacket. _required --gene-identifier , -g choice ( ensembl_id | entrez_id | hgnc_id ) Gene identifier to add to phenopacket ensembl_id --help boolean Show this message and exit. False","title":"Cli"},{"location":"api/pheval/cli/#main","text":"main CLI method for PhEval Args: verbose (int, optional): Verbose flag. quiet (bool, optional): Queit Flag. Usage: main [OPTIONS] COMMAND [ARGS]... Options: Name Type Description Default -v , --verbose integer range ( 0 and above) N/A 0 -q , --quiet text N/A None --help boolean Show this message and exit. False","title":"main"},{"location":"api/pheval/cli/#pheval","text":"pheval Usage: pheval [OPTIONS] COMMAND [ARGS]... Options: Name Type Description Default --help boolean Show this message and exit. False Subcommands run : PhEval Runner Command Line Interface","title":"pheval"},{"location":"api/pheval/cli/#run","text":"PhEval Runner Command Line Interface Args: input_dir (Path): The input directory (relative path: e.g exomiser-13.11) testdata_dir (Path): The input directory (relative path: e.g ./data runner (str): Runner implementation (e.g exomiser-13.11) tmp_dir (Path): The path of the temporary directory (optional) output_dir (Path): The path of the output directory config (Path): The path of the configuration file (optional e.g., config.yaml) version (str): The version of the tool implementation Usage: pheval run [OPTIONS] Options: Name Type Description Default --input-dir , -i Path The input directory (relative path: e.g exomiser-13.11) _required --testdata-dir , -t Path The input directory (relative path: e.g ./data) _required --runner , -r text Runner implementation (e.g exomiser-13.11) _required --tmp-dir , -m Path The path of the temporary directory (optional) None --output-dir , -o Path The path of the output directory _required --config , -c Path The path of the configuration file (optional e.g config.yaml) None --version , -v text Version of the tool implementation. None --help boolean Show this message and exit. False","title":"run"},{"location":"api/pheval/cli/#pheval-utils","text":"pheval_utils Usage: pheval-utils [OPTIONS] COMMAND [ARGS]... Options: Name Type Description Default --help boolean Show this message and exit. False Subcommands benchmark : Benchmark the gene/variant/disease prioritisation performance for a single run. benchmark-comparison : Benchmark the gene/variant/disease prioritisation performance for two runs. create-spiked-vcfs : generate-stats-plot : Generate bar plot from benchmark stats summary tsv. prepare-corpus : scramble-phenopackets : Generate noisy phenopackets from existing ones. semsim-scramble : Scrambles semsim profile multiplying score value by scramble factor semsim-to-exomiserdb : ingests semsim file into exomiser phenotypic database update-phenopackets : Update gene symbols and identifiers for phenopackets.","title":"pheval-utils"},{"location":"api/pheval/cli/#benchmark","text":"Benchmark the gene/variant/disease prioritisation performance for a single run. Usage: pheval-utils benchmark [OPTIONS] Options: Name Type Description Default --directory , -d Path General results directory to be benchmarked, assumes contains subdirectories of pheval_gene_results/,pheval_variant_results/ or pheval_disease_results/. _required --phenopacket-dir , -p Path Full path to directory containing input phenopackets. _required --output-prefix , -o text Output file prefix. _required --score-order , -so choice ( ascending | descending ) Ordering of results for ranking. descending --threshold , -t float Score threshold. 0.0 --gene-analysis / --no-gene-analysis boolean Specify analysis for gene prioritisation False --variant-analysis / --no-variant-analysis boolean Specify analysis for variant prioritisation False --disease-analysis / --no-disease-analysis boolean Specify analysis for disease prioritisation False --plot-type , -y choice ( bar_stacked | bar_cumulative | bar_non_cumulative ) Bar chart type to output. bar_stacked --help boolean Show this message and exit. False","title":"benchmark"},{"location":"api/pheval/cli/#benchmark-comparison","text":"Benchmark the gene/variant/disease prioritisation performance for two runs. Usage: pheval-utils benchmark-comparison [OPTIONS] Options: Name Type Description Default --run-data , -r Path Path to .txt file containing testdata phenopacket directory and corresponding results directory separated by tab.Each run contained to a new line with the input testdata listed first and on the same line separated by a tabthe results directory. _required --output-prefix , -o text Output file prefix. _required --score-order , -so choice ( ascending | descending ) Ordering of results for ranking. descending --threshold , -t float Score threshold. 0.0 --gene-analysis / --no-gene-analysis boolean Specify analysis for gene prioritisation False --variant-analysis / --no-variant-analysis boolean Specify analysis for variant prioritisation False --disease-analysis / --no-disease-analysis boolean Specify analysis for disease prioritisation False --plot-type , -y choice ( bar_stacked | bar_cumulative | bar_non_cumulative ) Bar chart type to output. bar_cumulative --help boolean Show this message and exit. False","title":"benchmark-comparison"},{"location":"api/pheval/cli/#create-spiked-vcfs","text":"Create spiked VCF from either a Phenopacket or a Phenopacket directory. Args: phenopacket_path (Path): Path to a single Phenopacket file (optional). phenopacket_dir (Path): Path to a directory containing Phenopacket files (optional). output_dir (Path): The directory to store the generated spiked VCF file(s). hg19_template_vcf (Path): Path to the hg19 template VCF file (optional). hg38_template_vcf (Path): Path to the hg38 template VCF file (optional). hg19_vcf_dir (Path): Path to the directory containing the hg19 VCF files (optional). hg38_vcf_dir (Path): Path to the directory containing the hg38 VCF files (optional). Usage: pheval-utils create-spiked-vcfs [OPTIONS] Options: Name Type Description Default --phenopacket-path , -p Path Path to phenopacket. NOTE: This argument is mutually exclusive with arguments: [phenopacket_dir]. None --phenopacket-dir , -P Path Path to phenopacket directory for updating. NOTE: This argument is mutually exclusive with arguments: [phenopacket_path]. None --hg19-template-vcf , -hg19 Path Template hg19 VCF file NOTE: This argument is mutually exclusive with arguments: [hg19_vcf_dir]. None --hg38-template-vcf , -hg38 Path Template hg38 VCF file NOTE: This argument is mutually exclusive with arguments: [hg38_vcf_dir]. None --hg19-vcf-dir , -hg19-dir Path Path to directory containing hg19 VCF templates. NOTE: This argument is mutually exclusive with arguments: [hg19_template_vcf]. None --hg38-vcf-dir , -hg38-dir Path Path to directory containing hg38 VCF templates. NOTE: This argument is mutually exclusive with arguments: [hg38_template_vcf]. None --output-dir , -O Path Path for creation of output directory vcf --help boolean Show this message and exit. False","title":"create-spiked-vcfs"},{"location":"api/pheval/cli/#generate-stats-plot","text":"Generate bar plot from benchmark stats summary tsv. Usage: pheval-utils generate-stats-plot [OPTIONS] Options: Name Type Description Default --benchmarking-tsv , -b Path Path to benchmark summary tsv output by PhEval benchmark commands. _required --gene-analysis / --no-gene-analysis boolean Specify analysis for gene prioritisation NOTE: This argument is mutually exclusive with arguments: [disease_analysis, variant_analysis]. False --variant-analysis / --no-variant-analysis boolean Specify analysis for variant prioritisation NOTE: This argument is mutually exclusive with arguments: [disease_analysis, gene_analysis]. False --disease-analysis / --no-disease-analysis boolean Specify analysis for disease prioritisation NOTE: This argument is mutually exclusive with arguments: [variant_analysis, gene_analysis]. False --plot-type , -y choice ( bar_stacked | bar_cumulative | bar_non_cumulative ) Bar chart type to output. bar_cumulative --title , -t text Title for plot, specify the title on the CLI enclosed with \"\" None --help boolean Show this message and exit. False","title":"generate-stats-plot"},{"location":"api/pheval/cli/#prepare-corpus","text":"Prepare a corpus of Phenopackets for analysis, optionally checking for complete variant records and updating gene identifiers. Args: phenopacket_dir (Path): The path to the directory containing Phenopackets. variant_analysis (bool): If True, check for complete variant records in the Phenopackets. gene_analysis (bool): If True, check for complete gene records in the Phenopackets. disease_analysis (bool): If True, check for complete disease records in the Phenopackets. gene_identifier (str): Identifier for updating gene identifiers, if applicable. hg19_template_vcf (Path): Path to the hg19 template VCF file (optional). hg38_template_vcf (Path): Path to the hg38 template VCF file (optional). hg19_vcf_dir (Path): Path to the directory containing the hg19 VCF files (optional). hg38_vcf_dir (Path): Path to the directory containing the hg38 VCF files (optional). output_dir (Path): The directory to save the prepared Phenopackets and, optionally, VCF files. Notes: To spike variants into VCFs for variant-based analysis at least one of hg19_template_vcf, hg38_template_vcf, hg19_vcf_dir or hg38_vcf_dir is required. Usage: pheval-utils prepare-corpus [OPTIONS] Options: Name Type Description Default --phenopacket-dir , -p Path Path to phenopacket corpus directory.. _required --variant-analysis / --no-variant-analysis boolean Specify whether to check for complete variant records in the phenopackets. False --gene-analysis / --no-gene-analysis boolean Specify whether to check for complete gene records in the phenopackets. False --disease-analysis / --no-disease-analysis boolean Specify whether to check for complete disease records in the phenopackets. False --gene-identifier , -g choice ( ensembl_id | entrez_id | hgnc_id ) Gene identifier to update in phenopacket None --hg19-template-vcf , -hg19 Path Template hg19 VCF file NOTE: This argument is mutually exclusive with arguments: [hg19_vcf_dir]. None --hg38-template-vcf , -hg38 Path Template hg38 VCF file NOTE: This argument is mutually exclusive with arguments: [hg38_vcf_dir]. None --hg19-vcf-dir , -hg19-dir Path Path to directory containing hg19 VCF templates. NOTE: This argument is mutually exclusive with arguments: [hg19_template_vcf]. None --hg38-vcf-dir , -hg38-dir Path Path to directory containing hg38 VCF templates. NOTE: This argument is mutually exclusive with arguments: [hg38_template_vcf]. None --output-dir , -o Path Path to output prepared corpus. prepared_corpus --help boolean Show this message and exit. False","title":"prepare-corpus"},{"location":"api/pheval/cli/#scramble-phenopackets","text":"Generate noisy phenopackets from existing ones. Usage: pheval-utils scramble-phenopackets [OPTIONS] Options: Name Type Description Default --phenopacket-path , -p Path Path to phenopacket. NOTE: This argument is mutually exclusive with arguments: [phenopacket_dir]. None --phenopacket-dir , -P Path Path to phenopackets directory. NOTE: This argument is mutually exclusive with arguments: [phenopacket_path]. None --scramble-factor , -s float Scramble factor for randomising phenopacket phenotypic profiles. 0.5 --output-dir , -O Path Path for creation of output directory noisy_phenopackets --help boolean Show this message and exit. False","title":"scramble-phenopackets"},{"location":"api/pheval/cli/#semsim-scramble","text":"Scrambles semsim profile multiplying score value by scramble factor Args: input (Path): Path file that points out to the semsim profile output (Path): Path file that points out to the output file score_column (List[str]): Score column(s) that will be scrambled scramble_factor (float): Scramble Magnitude Usage: pheval-utils semsim-scramble [OPTIONS] Options: Name Type Description Default --input , -i Path Path to the semantic similarity profile to be scrambled. _required --output , -o Path Path where the scrambled semsim file will be written. _required --score-column , -c choice ( jaccard_similarity | dice_similarity | phenodigm_score ) Score column that will be scrambled _required --scramble-factor , -s float Scramble Magnitude (noise) that will be applied to semantic similarity score column (e.g. jaccard similarity). 0.5 --help boolean Show this message and exit. False","title":"semsim-scramble"},{"location":"api/pheval/cli/#semsim-to-exomiserdb","text":"ingests semsim file into exomiser phenotypic database Args: input_file (Path): semsim input file. e.g phenio-plus-hp-mp.0.semsimian.tsv object_prefix (str): object prefix. e.g. MP subject_prefix (str): subject prefix e.g HP db_path (Path): Exomiser Phenotypic Database Folder Path. (e.g. /exomiser_folder/2209_phenotype/2209_phenotype/) Usage: pheval-utils semsim-to-exomiserdb [OPTIONS] Options: Name Type Description Default --input-file , -i Path Semsim input file. _required --object-prefix text Object Prefix. e.g. MP _required --subject-prefix text Subject Prefix. e.g. HP _required --db-path , -d Path Exomiser Phenotypic Database Folder Path. (e.g. /exomiser_folder/2209_phenotype/2209_phenotype/). This is the path where the phenotypic database folder will be written out. _required --help boolean Show this message and exit. False","title":"semsim-to-exomiserdb"},{"location":"api/pheval/cli/#update-phenopackets","text":"Update gene symbols and identifiers for phenopackets. Usage: pheval-utils update-phenopackets [OPTIONS] Options: Name Type Description Default --phenopacket-path , -p Path Path to phenopacket. NOTE: This argument is mutually exclusive with arguments: [phenopacket_dir]. None --phenopacket-dir , -P Path Path to phenopacket directory for updating. NOTE: This argument is mutually exclusive with arguments: [phenopacket_path]. None --output-dir , -o Path Path to write phenopacket. _required --gene-identifier , -g choice ( ensembl_id | entrez_id | hgnc_id ) Gene identifier to add to phenopacket ensembl_id --help boolean Show this message and exit. False","title":"update-phenopackets"},{"location":"api/pheval/config_parser/","text":"InputDirConfig dataclass Class for defining the fields within the input directory config. Parameters: Name Type Description Default tool str Name of the tool implementation (e.g. exomiser/phen2gene) required tool_version str Version of the tool implementation required variant_analysis bool Whether to extract prioritised variants from results. required gene_analysis bool Whether to extract prioritised genes from results. required disease_analysis bool Whether to extract prioritised diseases from results. required tool_specific_configuration_options Any Tool specific configurations required Source code in src/pheval/config_parser.py 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 @serde @dataclass class InputDirConfig : \"\"\" Class for defining the fields within the input directory config. Args: tool (str): Name of the tool implementation (e.g. exomiser/phen2gene) tool_version (str): Version of the tool implementation variant_analysis (bool): Whether to extract prioritised variants from results. gene_analysis (bool): Whether to extract prioritised genes from results. disease_analysis (bool): Whether to extract prioritised diseases from results. tool_specific_configuration_options (Any): Tool specific configurations \"\"\" tool : str tool_version : str variant_analysis : bool gene_analysis : bool disease_analysis : bool tool_specific_configuration_options : Any parse_input_dir_config ( input_dir ) Reads the config file. Source code in src/pheval/config_parser.py 35 36 37 38 39 40 def parse_input_dir_config ( input_dir : Path ) -> InputDirConfig : \"\"\"Reads the config file.\"\"\" with open ( Path ( input_dir ) . joinpath ( \"config.yaml\" ), \"r\" ) as config_file : config = yaml . safe_load ( config_file ) config_file . close () return from_yaml ( InputDirConfig , yaml . dump ( config ))","title":"Config parser"},{"location":"api/pheval/config_parser/#src.pheval.config_parser.InputDirConfig","text":"Class for defining the fields within the input directory config. Parameters: Name Type Description Default tool str Name of the tool implementation (e.g. exomiser/phen2gene) required tool_version str Version of the tool implementation required variant_analysis bool Whether to extract prioritised variants from results. required gene_analysis bool Whether to extract prioritised genes from results. required disease_analysis bool Whether to extract prioritised diseases from results. required tool_specific_configuration_options Any Tool specific configurations required Source code in src/pheval/config_parser.py 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 @serde @dataclass class InputDirConfig : \"\"\" Class for defining the fields within the input directory config. Args: tool (str): Name of the tool implementation (e.g. exomiser/phen2gene) tool_version (str): Version of the tool implementation variant_analysis (bool): Whether to extract prioritised variants from results. gene_analysis (bool): Whether to extract prioritised genes from results. disease_analysis (bool): Whether to extract prioritised diseases from results. tool_specific_configuration_options (Any): Tool specific configurations \"\"\" tool : str tool_version : str variant_analysis : bool gene_analysis : bool disease_analysis : bool tool_specific_configuration_options : Any","title":"InputDirConfig"},{"location":"api/pheval/config_parser/#src.pheval.config_parser.parse_input_dir_config","text":"Reads the config file. Source code in src/pheval/config_parser.py 35 36 37 38 39 40 def parse_input_dir_config ( input_dir : Path ) -> InputDirConfig : \"\"\"Reads the config file.\"\"\" with open ( Path ( input_dir ) . joinpath ( \"config.yaml\" ), \"r\" ) as config_file : config = yaml . safe_load ( config_file ) config_file . close () return from_yaml ( InputDirConfig , yaml . dump ( config ))","title":"parse_input_dir_config"},{"location":"api/pheval/constants/","text":"","title":"Constants"},{"location":"api/pheval/run_metadata/","text":"BasicOutputRunMetaData dataclass Class for defining variables for the run metadata. Args: tool (str): Name of the tool implementation tool_version (str): Version of the tool implementation config (Path): Path to the config file located in the input directory run_timestamp (int): Time taken for run to complete corpus (Path): Path to corpus used in pheval run tool_specific_configuration_options (Any): Special field that can be overwritten by tool implementations to contain any extra tool specific configurations used in the run Source code in src/pheval/run_metadata.py 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 @serde @dataclass class BasicOutputRunMetaData : \"\"\"Class for defining variables for the run metadata. Args: tool (str): Name of the tool implementation tool_version (str): Version of the tool implementation config (Path): Path to the config file located in the input directory run_timestamp (int): Time taken for run to complete corpus (Path): Path to corpus used in pheval run tool_specific_configuration_options (Any): Special field that can be overwritten by tool implementations to contain any extra tool specific configurations used in the run \"\"\" tool : str tool_version : str config : Path run_timestamp : int corpus : Path tool_specific_configuration_options : Any = None","title":"Run metadata"},{"location":"api/pheval/run_metadata/#src.pheval.run_metadata.BasicOutputRunMetaData","text":"Class for defining variables for the run metadata. Args: tool (str): Name of the tool implementation tool_version (str): Version of the tool implementation config (Path): Path to the config file located in the input directory run_timestamp (int): Time taken for run to complete corpus (Path): Path to corpus used in pheval run tool_specific_configuration_options (Any): Special field that can be overwritten by tool implementations to contain any extra tool specific configurations used in the run Source code in src/pheval/run_metadata.py 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 @serde @dataclass class BasicOutputRunMetaData : \"\"\"Class for defining variables for the run metadata. Args: tool (str): Name of the tool implementation tool_version (str): Version of the tool implementation config (Path): Path to the config file located in the input directory run_timestamp (int): Time taken for run to complete corpus (Path): Path to corpus used in pheval run tool_specific_configuration_options (Any): Special field that can be overwritten by tool implementations to contain any extra tool specific configurations used in the run \"\"\" tool : str tool_version : str config : Path run_timestamp : int corpus : Path tool_specific_configuration_options : Any = None","title":"BasicOutputRunMetaData"},{"location":"api/pheval/analyse/analysis/","text":"benchmark_directory ( results_dir_and_input , score_order , output_prefix , threshold , gene_analysis , variant_analysis , disease_analysis , plot_type ) Benchmark prioritisation performance for a single run. Parameters: Name Type Description Default results_dir_and_input TrackInputOutputDirectories Input and output directories for tracking results. required score_order str The order in which scores are arranged, this can be either ascending or descending. required output_prefix str Prefix for the benchmark output file names. required threshold float The threshold for benchmark evaluation. required gene_analysis bool Boolean flag indicating whether to benchmark gene results. required variant_analysis bool Boolean flag indicating whether to benchmark variant results. required disease_analysis bool Boolean flag indicating whether to benchmark disease results. required plot_type str Type of plot for benchmark visualisation. required Source code in src/pheval/analyse/analysis.py 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 def benchmark_directory ( results_dir_and_input : TrackInputOutputDirectories , score_order : str , output_prefix : str , threshold : float , gene_analysis : bool , variant_analysis : bool , disease_analysis : bool , plot_type : str , ) -> None : \"\"\" Benchmark prioritisation performance for a single run. Args: results_dir_and_input (TrackInputOutputDirectories): Input and output directories for tracking results. score_order (str): The order in which scores are arranged, this can be either ascending or descending. output_prefix (str): Prefix for the benchmark output file names. threshold (float): The threshold for benchmark evaluation. gene_analysis (bool): Boolean flag indicating whether to benchmark gene results. variant_analysis (bool): Boolean flag indicating whether to benchmark variant results. disease_analysis (bool): Boolean flag indicating whether to benchmark disease results. plot_type (str): Type of plot for benchmark visualisation. \"\"\" if gene_analysis : _run_benchmark ( results_dir_and_input = results_dir_and_input , score_order = score_order , output_prefix = output_prefix , threshold = threshold , plot_type = plot_type , benchmark_generator = GeneBenchmarkRunOutputGenerator (), ) if variant_analysis : _run_benchmark ( results_dir_and_input = results_dir_and_input , score_order = score_order , output_prefix = output_prefix , threshold = threshold , plot_type = plot_type , benchmark_generator = VariantBenchmarkRunOutputGenerator (), ) if disease_analysis : _run_benchmark ( results_dir_and_input = results_dir_and_input , score_order = score_order , output_prefix = output_prefix , threshold = threshold , plot_type = plot_type , benchmark_generator = DiseaseBenchmarkRunOutputGenerator (), ) benchmark_run_comparisons ( results_directories , score_order , output_prefix , threshold , gene_analysis , variant_analysis , disease_analysis , plot_type ) Benchmark prioritisation performance for several runs. Parameters: Name Type Description Default results_directories List [ TrackInputOutputDirectories ] Input and output directories for tracking results. required score_order str The order in which scores are arranged, this can be either ascending or descending. required output_prefix str Prefix for the benchmark output file names. required threshold float The threshold for benchmark evaluation. required gene_analysis bool Boolean flag indicating whether to benchmark gene results. required variant_analysis bool Boolean flag indicating whether to benchmark variant results. required disease_analysis bool Boolean flag indicating whether to benchmark disease results. required plot_type str Type of plot for benchmark visualisation. required Source code in src/pheval/analyse/analysis.py 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 def benchmark_run_comparisons ( results_directories : List [ TrackInputOutputDirectories ], score_order : str , output_prefix : str , threshold : float , gene_analysis : bool , variant_analysis : bool , disease_analysis : bool , plot_type : str , ) -> None : \"\"\" Benchmark prioritisation performance for several runs. Args: results_directories (List[TrackInputOutputDirectories]): Input and output directories for tracking results. score_order (str): The order in which scores are arranged, this can be either ascending or descending. output_prefix (str): Prefix for the benchmark output file names. threshold (float): The threshold for benchmark evaluation. gene_analysis (bool): Boolean flag indicating whether to benchmark gene results. variant_analysis (bool): Boolean flag indicating whether to benchmark variant results. disease_analysis (bool): Boolean flag indicating whether to benchmark disease results. plot_type (str): Type of plot for benchmark visualisation. \"\"\" if gene_analysis : _run_benchmark_comparison ( results_directories = results_directories , score_order = score_order , output_prefix = output_prefix , threshold = threshold , plot_type = plot_type , benchmark_generator = GeneBenchmarkRunOutputGenerator (), ) if variant_analysis : _run_benchmark_comparison ( results_directories = results_directories , score_order = score_order , output_prefix = output_prefix , threshold = threshold , plot_type = plot_type , benchmark_generator = VariantBenchmarkRunOutputGenerator (), ) if disease_analysis : _run_benchmark_comparison ( results_directories = results_directories , score_order = score_order , output_prefix = output_prefix , threshold = threshold , plot_type = plot_type , benchmark_generator = DiseaseBenchmarkRunOutputGenerator (), )","title":"Analysis"},{"location":"api/pheval/analyse/analysis/#src.pheval.analyse.analysis.benchmark_directory","text":"Benchmark prioritisation performance for a single run. Parameters: Name Type Description Default results_dir_and_input TrackInputOutputDirectories Input and output directories for tracking results. required score_order str The order in which scores are arranged, this can be either ascending or descending. required output_prefix str Prefix for the benchmark output file names. required threshold float The threshold for benchmark evaluation. required gene_analysis bool Boolean flag indicating whether to benchmark gene results. required variant_analysis bool Boolean flag indicating whether to benchmark variant results. required disease_analysis bool Boolean flag indicating whether to benchmark disease results. required plot_type str Type of plot for benchmark visualisation. required Source code in src/pheval/analyse/analysis.py 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 def benchmark_directory ( results_dir_and_input : TrackInputOutputDirectories , score_order : str , output_prefix : str , threshold : float , gene_analysis : bool , variant_analysis : bool , disease_analysis : bool , plot_type : str , ) -> None : \"\"\" Benchmark prioritisation performance for a single run. Args: results_dir_and_input (TrackInputOutputDirectories): Input and output directories for tracking results. score_order (str): The order in which scores are arranged, this can be either ascending or descending. output_prefix (str): Prefix for the benchmark output file names. threshold (float): The threshold for benchmark evaluation. gene_analysis (bool): Boolean flag indicating whether to benchmark gene results. variant_analysis (bool): Boolean flag indicating whether to benchmark variant results. disease_analysis (bool): Boolean flag indicating whether to benchmark disease results. plot_type (str): Type of plot for benchmark visualisation. \"\"\" if gene_analysis : _run_benchmark ( results_dir_and_input = results_dir_and_input , score_order = score_order , output_prefix = output_prefix , threshold = threshold , plot_type = plot_type , benchmark_generator = GeneBenchmarkRunOutputGenerator (), ) if variant_analysis : _run_benchmark ( results_dir_and_input = results_dir_and_input , score_order = score_order , output_prefix = output_prefix , threshold = threshold , plot_type = plot_type , benchmark_generator = VariantBenchmarkRunOutputGenerator (), ) if disease_analysis : _run_benchmark ( results_dir_and_input = results_dir_and_input , score_order = score_order , output_prefix = output_prefix , threshold = threshold , plot_type = plot_type , benchmark_generator = DiseaseBenchmarkRunOutputGenerator (), )","title":"benchmark_directory"},{"location":"api/pheval/analyse/analysis/#src.pheval.analyse.analysis.benchmark_run_comparisons","text":"Benchmark prioritisation performance for several runs. Parameters: Name Type Description Default results_directories List [ TrackInputOutputDirectories ] Input and output directories for tracking results. required score_order str The order in which scores are arranged, this can be either ascending or descending. required output_prefix str Prefix for the benchmark output file names. required threshold float The threshold for benchmark evaluation. required gene_analysis bool Boolean flag indicating whether to benchmark gene results. required variant_analysis bool Boolean flag indicating whether to benchmark variant results. required disease_analysis bool Boolean flag indicating whether to benchmark disease results. required plot_type str Type of plot for benchmark visualisation. required Source code in src/pheval/analyse/analysis.py 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 def benchmark_run_comparisons ( results_directories : List [ TrackInputOutputDirectories ], score_order : str , output_prefix : str , threshold : float , gene_analysis : bool , variant_analysis : bool , disease_analysis : bool , plot_type : str , ) -> None : \"\"\" Benchmark prioritisation performance for several runs. Args: results_directories (List[TrackInputOutputDirectories]): Input and output directories for tracking results. score_order (str): The order in which scores are arranged, this can be either ascending or descending. output_prefix (str): Prefix for the benchmark output file names. threshold (float): The threshold for benchmark evaluation. gene_analysis (bool): Boolean flag indicating whether to benchmark gene results. variant_analysis (bool): Boolean flag indicating whether to benchmark variant results. disease_analysis (bool): Boolean flag indicating whether to benchmark disease results. plot_type (str): Type of plot for benchmark visualisation. \"\"\" if gene_analysis : _run_benchmark_comparison ( results_directories = results_directories , score_order = score_order , output_prefix = output_prefix , threshold = threshold , plot_type = plot_type , benchmark_generator = GeneBenchmarkRunOutputGenerator (), ) if variant_analysis : _run_benchmark_comparison ( results_directories = results_directories , score_order = score_order , output_prefix = output_prefix , threshold = threshold , plot_type = plot_type , benchmark_generator = VariantBenchmarkRunOutputGenerator (), ) if disease_analysis : _run_benchmark_comparison ( results_directories = results_directories , score_order = score_order , output_prefix = output_prefix , threshold = threshold , plot_type = plot_type , benchmark_generator = DiseaseBenchmarkRunOutputGenerator (), )","title":"benchmark_run_comparisons"},{"location":"api/pheval/analyse/benchmark_generator/","text":"BenchmarkRunOutputGenerator dataclass Base class for recording data required for generating benchmarking outputs. Attributes: Name Type Description prioritisation_type_file_prefix str Prefix for the prioritisation type output file. y_label str Label for the y-axis in benchmarking outputs. generate_benchmark_run_results Callable Callable to generate benchmark run results. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix str Suffix for the rank comparison file. Source code in src/pheval/analyse/benchmark_generator.py 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 @dataclass class BenchmarkRunOutputGenerator : \"\"\"Base class for recording data required for generating benchmarking outputs. Attributes: prioritisation_type_file_prefix (str): Prefix for the prioritisation type output file. y_label (str): Label for the y-axis in benchmarking outputs. generate_benchmark_run_results (Callable): Callable to generate benchmark run results. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix (str): Suffix for the rank comparison file. \"\"\" prioritisation_type_file_prefix : str y_label : str generate_benchmark_run_results : Callable [ [ TrackInputOutputDirectories , str , float , defaultdict ], BenchmarkRunResults ] stats_comparison_file_suffix : str DiseaseBenchmarkRunOutputGenerator dataclass Bases: BenchmarkRunOutputGenerator Subclass of BenchmarkRunOutputGenerator specialised for producing disease prioritisation benchmarking outputs. This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes specifically for disease prioritisation benchmarking. Attributes: Name Type Description prioritisation_type_file_prefix str Prefix for the disease prioritisation type file. Defaults to DISEASE_PLOT_FILE_PREFIX. y_label str Label for the y-axis in disease prioritisation benchmarking outputs. Defaults to DISEASE_PLOT_Y_LABEL. generate_benchmark_run_results Callable Callable to generate disease prioritisation benchmark run results. Defaults to benchmark_disease_prioritisation. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix str Suffix for the disease rank comparison file. Defaults to \"-disease_summary.tsv\". Source code in src/pheval/analyse/benchmark_generator.py 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 @dataclass class DiseaseBenchmarkRunOutputGenerator ( BenchmarkRunOutputGenerator ): \"\"\" Subclass of BenchmarkRunOutputGenerator specialised for producing disease prioritisation benchmarking outputs. This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes specifically for disease prioritisation benchmarking. Attributes: prioritisation_type_file_prefix (str): Prefix for the disease prioritisation type file. Defaults to DISEASE_PLOT_FILE_PREFIX. y_label (str): Label for the y-axis in disease prioritisation benchmarking outputs. Defaults to DISEASE_PLOT_Y_LABEL. generate_benchmark_run_results (Callable): Callable to generate disease prioritisation benchmark run results. Defaults to benchmark_disease_prioritisation. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix (str): Suffix for the disease rank comparison file. Defaults to \"-disease_summary.tsv\". \"\"\" prioritisation_type_file_prefix : str = DISEASE_PLOT_FILE_PREFIX y_label : str = DISEASE_PLOT_Y_LABEL generate_benchmark_run_results : Callable [ [ TrackInputOutputDirectories , str , float , defaultdict ], BenchmarkRunResults ] = benchmark_disease_prioritisation stats_comparison_file_suffix : str = \"-disease_summary.tsv\" GeneBenchmarkRunOutputGenerator dataclass Bases: BenchmarkRunOutputGenerator Subclass of BenchmarkRunOutputGenerator specialised for producing gene prioritisation benchmarking outputs. This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes specifically for gene prioritisation benchmarking. Attributes: Name Type Description prioritisation_type_file_prefix str Prefix for the gene prioritisation type file. Defaults to GENE_PLOT_FILE_PREFIX. y_label str Label for the y-axis in gene prioritisation benchmarking outputs. Defaults to GENE_PLOT_Y_LABEL. generate_benchmark_run_results Callable Callable to generate gene prioritisation benchmark run results. Defaults to benchmark_gene_prioritisation. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix str Suffix for the gene rank comparison file. Defaults to \"-gene_summary.tsv\". Source code in src/pheval/analyse/benchmark_generator.py 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 @dataclass class GeneBenchmarkRunOutputGenerator ( BenchmarkRunOutputGenerator ): \"\"\" Subclass of BenchmarkRunOutputGenerator specialised for producing gene prioritisation benchmarking outputs. This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes specifically for gene prioritisation benchmarking. Attributes: prioritisation_type_file_prefix (str): Prefix for the gene prioritisation type file. Defaults to GENE_PLOT_FILE_PREFIX. y_label (str): Label for the y-axis in gene prioritisation benchmarking outputs. Defaults to GENE_PLOT_Y_LABEL. generate_benchmark_run_results (Callable): Callable to generate gene prioritisation benchmark run results. Defaults to benchmark_gene_prioritisation. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix (str): Suffix for the gene rank comparison file. Defaults to \"-gene_summary.tsv\". \"\"\" prioritisation_type_file_prefix : str = GENE_PLOT_FILE_PREFIX y_label : str = GENE_PLOT_Y_LABEL generate_benchmark_run_results : Callable [ [ TrackInputOutputDirectories , str , float , defaultdict ], BenchmarkRunResults ] = benchmark_gene_prioritisation stats_comparison_file_suffix : str = \"-gene_summary.tsv\" VariantBenchmarkRunOutputGenerator dataclass Bases: BenchmarkRunOutputGenerator Subclass of BenchmarkRunOutputGenerator specialised for producing variant prioritisation benchmarking outputs. This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes specifically for variant prioritisation benchmarking. Attributes: Name Type Description prioritisation_type_file_prefix str Prefix for the variant prioritisation type file. Defaults to VARIANT_PLOT_FILE_PREFIX. y_label str Label for the y-axis in variant prioritisation benchmarking outputs. Defaults to VARIANT_PLOT_Y_LABEL. generate_benchmark_run_results Callable Callable to generate variant prioritisation benchmark run results. Defaults to benchmark_variant_prioritisation. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix str Suffix for the variant rank comparison file. Defaults to \"-variant_summary.tsv\". Source code in src/pheval/analyse/benchmark_generator.py 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 @dataclass class VariantBenchmarkRunOutputGenerator ( BenchmarkRunOutputGenerator ): \"\"\" Subclass of BenchmarkRunOutputGenerator specialised for producing variant prioritisation benchmarking outputs. This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes specifically for variant prioritisation benchmarking. Attributes: prioritisation_type_file_prefix (str): Prefix for the variant prioritisation type file. Defaults to VARIANT_PLOT_FILE_PREFIX. y_label (str): Label for the y-axis in variant prioritisation benchmarking outputs. Defaults to VARIANT_PLOT_Y_LABEL. generate_benchmark_run_results (Callable): Callable to generate variant prioritisation benchmark run results. Defaults to benchmark_variant_prioritisation. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix (str): Suffix for the variant rank comparison file. Defaults to \"-variant_summary.tsv\". \"\"\" prioritisation_type_file_prefix : str = VARIANT_PLOT_FILE_PREFIX y_label : str = VARIANT_PLOT_Y_LABEL generate_benchmark_run_results : Callable [ [ TrackInputOutputDirectories , str , float , defaultdict ], BenchmarkRunResults ] = benchmark_variant_prioritisation stats_comparison_file_suffix : str = \"-variant_summary.tsv\"","title":"Benchmark generator"},{"location":"api/pheval/analyse/benchmark_generator/#src.pheval.analyse.benchmark_generator.BenchmarkRunOutputGenerator","text":"Base class for recording data required for generating benchmarking outputs. Attributes: Name Type Description prioritisation_type_file_prefix str Prefix for the prioritisation type output file. y_label str Label for the y-axis in benchmarking outputs. generate_benchmark_run_results Callable Callable to generate benchmark run results. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix str Suffix for the rank comparison file. Source code in src/pheval/analyse/benchmark_generator.py 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 @dataclass class BenchmarkRunOutputGenerator : \"\"\"Base class for recording data required for generating benchmarking outputs. Attributes: prioritisation_type_file_prefix (str): Prefix for the prioritisation type output file. y_label (str): Label for the y-axis in benchmarking outputs. generate_benchmark_run_results (Callable): Callable to generate benchmark run results. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix (str): Suffix for the rank comparison file. \"\"\" prioritisation_type_file_prefix : str y_label : str generate_benchmark_run_results : Callable [ [ TrackInputOutputDirectories , str , float , defaultdict ], BenchmarkRunResults ] stats_comparison_file_suffix : str","title":"BenchmarkRunOutputGenerator"},{"location":"api/pheval/analyse/benchmark_generator/#src.pheval.analyse.benchmark_generator.DiseaseBenchmarkRunOutputGenerator","text":"Bases: BenchmarkRunOutputGenerator Subclass of BenchmarkRunOutputGenerator specialised for producing disease prioritisation benchmarking outputs. This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes specifically for disease prioritisation benchmarking. Attributes: Name Type Description prioritisation_type_file_prefix str Prefix for the disease prioritisation type file. Defaults to DISEASE_PLOT_FILE_PREFIX. y_label str Label for the y-axis in disease prioritisation benchmarking outputs. Defaults to DISEASE_PLOT_Y_LABEL. generate_benchmark_run_results Callable Callable to generate disease prioritisation benchmark run results. Defaults to benchmark_disease_prioritisation. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix str Suffix for the disease rank comparison file. Defaults to \"-disease_summary.tsv\". Source code in src/pheval/analyse/benchmark_generator.py 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 @dataclass class DiseaseBenchmarkRunOutputGenerator ( BenchmarkRunOutputGenerator ): \"\"\" Subclass of BenchmarkRunOutputGenerator specialised for producing disease prioritisation benchmarking outputs. This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes specifically for disease prioritisation benchmarking. Attributes: prioritisation_type_file_prefix (str): Prefix for the disease prioritisation type file. Defaults to DISEASE_PLOT_FILE_PREFIX. y_label (str): Label for the y-axis in disease prioritisation benchmarking outputs. Defaults to DISEASE_PLOT_Y_LABEL. generate_benchmark_run_results (Callable): Callable to generate disease prioritisation benchmark run results. Defaults to benchmark_disease_prioritisation. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix (str): Suffix for the disease rank comparison file. Defaults to \"-disease_summary.tsv\". \"\"\" prioritisation_type_file_prefix : str = DISEASE_PLOT_FILE_PREFIX y_label : str = DISEASE_PLOT_Y_LABEL generate_benchmark_run_results : Callable [ [ TrackInputOutputDirectories , str , float , defaultdict ], BenchmarkRunResults ] = benchmark_disease_prioritisation stats_comparison_file_suffix : str = \"-disease_summary.tsv\"","title":"DiseaseBenchmarkRunOutputGenerator"},{"location":"api/pheval/analyse/benchmark_generator/#src.pheval.analyse.benchmark_generator.GeneBenchmarkRunOutputGenerator","text":"Bases: BenchmarkRunOutputGenerator Subclass of BenchmarkRunOutputGenerator specialised for producing gene prioritisation benchmarking outputs. This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes specifically for gene prioritisation benchmarking. Attributes: Name Type Description prioritisation_type_file_prefix str Prefix for the gene prioritisation type file. Defaults to GENE_PLOT_FILE_PREFIX. y_label str Label for the y-axis in gene prioritisation benchmarking outputs. Defaults to GENE_PLOT_Y_LABEL. generate_benchmark_run_results Callable Callable to generate gene prioritisation benchmark run results. Defaults to benchmark_gene_prioritisation. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix str Suffix for the gene rank comparison file. Defaults to \"-gene_summary.tsv\". Source code in src/pheval/analyse/benchmark_generator.py 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 @dataclass class GeneBenchmarkRunOutputGenerator ( BenchmarkRunOutputGenerator ): \"\"\" Subclass of BenchmarkRunOutputGenerator specialised for producing gene prioritisation benchmarking outputs. This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes specifically for gene prioritisation benchmarking. Attributes: prioritisation_type_file_prefix (str): Prefix for the gene prioritisation type file. Defaults to GENE_PLOT_FILE_PREFIX. y_label (str): Label for the y-axis in gene prioritisation benchmarking outputs. Defaults to GENE_PLOT_Y_LABEL. generate_benchmark_run_results (Callable): Callable to generate gene prioritisation benchmark run results. Defaults to benchmark_gene_prioritisation. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix (str): Suffix for the gene rank comparison file. Defaults to \"-gene_summary.tsv\". \"\"\" prioritisation_type_file_prefix : str = GENE_PLOT_FILE_PREFIX y_label : str = GENE_PLOT_Y_LABEL generate_benchmark_run_results : Callable [ [ TrackInputOutputDirectories , str , float , defaultdict ], BenchmarkRunResults ] = benchmark_gene_prioritisation stats_comparison_file_suffix : str = \"-gene_summary.tsv\"","title":"GeneBenchmarkRunOutputGenerator"},{"location":"api/pheval/analyse/benchmark_generator/#src.pheval.analyse.benchmark_generator.VariantBenchmarkRunOutputGenerator","text":"Bases: BenchmarkRunOutputGenerator Subclass of BenchmarkRunOutputGenerator specialised for producing variant prioritisation benchmarking outputs. This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes specifically for variant prioritisation benchmarking. Attributes: Name Type Description prioritisation_type_file_prefix str Prefix for the variant prioritisation type file. Defaults to VARIANT_PLOT_FILE_PREFIX. y_label str Label for the y-axis in variant prioritisation benchmarking outputs. Defaults to VARIANT_PLOT_Y_LABEL. generate_benchmark_run_results Callable Callable to generate variant prioritisation benchmark run results. Defaults to benchmark_variant_prioritisation. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix str Suffix for the variant rank comparison file. Defaults to \"-variant_summary.tsv\". Source code in src/pheval/analyse/benchmark_generator.py 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 @dataclass class VariantBenchmarkRunOutputGenerator ( BenchmarkRunOutputGenerator ): \"\"\" Subclass of BenchmarkRunOutputGenerator specialised for producing variant prioritisation benchmarking outputs. This subclass inherits from BenchmarkRunOutputGenerator and specialises its attributes specifically for variant prioritisation benchmarking. Attributes: prioritisation_type_file_prefix (str): Prefix for the variant prioritisation type file. Defaults to VARIANT_PLOT_FILE_PREFIX. y_label (str): Label for the y-axis in variant prioritisation benchmarking outputs. Defaults to VARIANT_PLOT_Y_LABEL. generate_benchmark_run_results (Callable): Callable to generate variant prioritisation benchmark run results. Defaults to benchmark_variant_prioritisation. Takes parameters: input and results directory, score order, threshold, rank comparison, and returns BenchmarkRunResults. stats_comparison_file_suffix (str): Suffix for the variant rank comparison file. Defaults to \"-variant_summary.tsv\". \"\"\" prioritisation_type_file_prefix : str = VARIANT_PLOT_FILE_PREFIX y_label : str = VARIANT_PLOT_Y_LABEL generate_benchmark_run_results : Callable [ [ TrackInputOutputDirectories , str , float , defaultdict ], BenchmarkRunResults ] = benchmark_variant_prioritisation stats_comparison_file_suffix : str = \"-variant_summary.tsv\"","title":"VariantBenchmarkRunOutputGenerator"},{"location":"api/pheval/analyse/benchmarking_data/","text":"BenchmarkRunResults dataclass Benchmarking results for a run. Attributes: Name Type Description ranks dict Dictionary containing recorded ranks for samples. rank_stats RankStats Statistics related to benchmark. results_dir Path Path to the result directory. Defaults to None. benchmark_name str Name of the benchmark run. Defaults to None. Source code in src/pheval/analyse/benchmarking_data.py 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 @dataclass class BenchmarkRunResults : \"\"\" Benchmarking results for a run. Attributes: ranks (dict): Dictionary containing recorded ranks for samples. rank_stats (RankStats): Statistics related to benchmark. results_dir (Path, optional): Path to the result directory. Defaults to None. benchmark_name (str, optional): Name of the benchmark run. Defaults to None. \"\"\" ranks : dict rank_stats : RankStats binary_classification_stats : BinaryClassificationStats results_dir : Path = None benchmark_name : str = None","title":"Benchmarking data"},{"location":"api/pheval/analyse/benchmarking_data/#src.pheval.analyse.benchmarking_data.BenchmarkRunResults","text":"Benchmarking results for a run. Attributes: Name Type Description ranks dict Dictionary containing recorded ranks for samples. rank_stats RankStats Statistics related to benchmark. results_dir Path Path to the result directory. Defaults to None. benchmark_name str Name of the benchmark run. Defaults to None. Source code in src/pheval/analyse/benchmarking_data.py 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 @dataclass class BenchmarkRunResults : \"\"\" Benchmarking results for a run. Attributes: ranks (dict): Dictionary containing recorded ranks for samples. rank_stats (RankStats): Statistics related to benchmark. results_dir (Path, optional): Path to the result directory. Defaults to None. benchmark_name (str, optional): Name of the benchmark run. Defaults to None. \"\"\" ranks : dict rank_stats : RankStats binary_classification_stats : BinaryClassificationStats results_dir : Path = None benchmark_name : str = None","title":"BenchmarkRunResults"},{"location":"api/pheval/analyse/binary_classification_stats/","text":"BinaryClassificationStats dataclass A data class representing counts of different categories in binary classification. Attributes: Name Type Description true_positives int The count of true positive instances - i.e., the number of known entities ranked 1 in the results. true_negatives int The count of true negative instances - i.e., the number of non-relevant entities ranked at a position other than 1 in the results. false_positives int The count of false positive instances - i.e., the number of non-relevant entities ranked at position 1 in the results. false_negatives int The count of false negative instances - i.e., the number of known entities ranked at a position other than 1 in the results. Source code in src/pheval/analyse/binary_classification_stats.py 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 @dataclass class BinaryClassificationStats : \"\"\" A data class representing counts of different categories in binary classification. Attributes: true_positives (int): The count of true positive instances - i.e., the number of known entities ranked 1 in the results. true_negatives (int): The count of true negative instances - i.e., the number of non-relevant entities ranked at a position other than 1 in the results. false_positives (int): The count of false positive instances - i.e., the number of non-relevant entities ranked at position 1 in the results. false_negatives (int): The count of false negative instances - i.e., the number of known entities ranked at a position other than 1 in the results. \"\"\" true_positives : int = 0 true_negatives : int = 0 false_positives : int = 0 false_negatives : int = 0 labels : List = field ( default_factory = list ) scores : List = field ( default_factory = list ) @staticmethod def remove_relevant_ranks ( pheval_results : Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ], ], relevant_ranks : List [ int ], ) -> List [ int ]: \"\"\" Remove the relevant entity ranks from all result ranks Args: pheval_results: (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): The list of all pheval results. relevant_ranks (List[int]): A list of the ranks associated with the known entities. Returns: List[int]: A list of the ranks with the relevant entity ranks removed. \"\"\" all_result_ranks = [ pheval_result . rank for pheval_result in pheval_results ] for rank in relevant_ranks : if rank in all_result_ranks : all_result_ranks . remove ( rank ) continue return all_result_ranks def add_classification_for_known_entities ( self , relevant_ranks : List [ int ]) -> None : \"\"\" Update binary classification metrics for known entities based on their ranking. Args: relevant_ranks (List[int]): A list of the ranks associated with the known entities. \"\"\" for rank in relevant_ranks : if rank == 1 : self . true_positives += 1 elif rank != 1 : self . false_negatives += 1 def add_classification_for_other_entities ( self , ranks : List [ int ]) -> None : \"\"\" Update binary classification metrics for other entities based on their ranking. Args: ranks (List[int]): A list of the ranks for all other entities. \"\"\" for rank in ranks : if rank == 1 : self . false_positives += 1 elif rank != 1 : self . true_negatives += 1 def add_labels_and_scores ( self , pheval_results : Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ], ], relevant_ranks : List [ int ], ): \"\"\" Adds scores and labels from the PhEval results. Args: pheval_results (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): List of all PhEval results relevant_ranks (List[int]): A list of the ranks associated with the known entities. \"\"\" relevant_ranks_copy = relevant_ranks . copy () for result in pheval_results : self . scores . append ( result . score ) label = 1 if result . rank in relevant_ranks_copy else 0 self . labels . append ( label ) relevant_ranks_copy . remove ( result . rank ) if label == 1 else None def add_classification ( self , pheval_results : Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ], ], relevant_ranks : List [ int ], ) -> None : \"\"\" Update binary classification metrics for known and unknown entities based on their ranks. Args: pheval_results: (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): The list of all pheval results. relevant_ranks (List[int]): A list of the ranks associated with the known entities. \"\"\" self . add_classification_for_known_entities ( relevant_ranks ) self . add_classification_for_other_entities ( self . remove_relevant_ranks ( pheval_results , relevant_ranks ) ) self . add_labels_and_scores ( pheval_results , relevant_ranks ) def sensitivity ( self ) -> float : \"\"\" Calculate sensitivity. Sensitivity measures the proportion of actual positive instances correctly identified by the model. Returns: float: The sensitivity of the model, calculated as true positives divided by the sum of true positives and false negatives. Returns 0 if both true positives and false negatives are zero. \"\"\" return ( self . true_positives / ( self . true_positives + self . false_negatives ) if ( self . true_positives + self . false_negatives ) > 0 else 0.0 ) def specificity ( self ) -> float : \"\"\" Calculate specificity. Specificity measures the proportion of actual negative instances correctly identified by the model. Returns: float: The specificity of the model, calculated as true negatives divided by the sum of true negatives and false positives. Returns 0.0 if both true negatives and false positives are zero. \"\"\" return ( self . true_negatives / ( self . true_negatives + self . false_positives ) if ( self . true_negatives + self . false_positives ) > 0 else 0.0 ) def precision ( self ) -> float : \"\"\" Calculate precision. Precision measures the proportion of correctly predicted positive instances out of all instances predicted as positive. Returns: float: The precision of the model, calculated as true positives divided by the sum of true positives and false positives. Returns 0.0 if both true positives and false positives are zero. \"\"\" return ( self . true_positives / ( self . true_positives + self . false_positives ) if ( self . true_positives + self . false_positives ) > 0 else 0.0 ) def negative_predictive_value ( self ) -> float : \"\"\" Calculate Negative Predictive Value (NPV). NPV measures the proportion of correctly predicted negative instances out of all instances predicted negative. Returns: float: The Negative Predictive Value of the model, calculated as true negatives divided by the sum of true negatives and false negatives. Returns 0.0 if both true negatives and false negatives are zero. \"\"\" return ( self . true_negatives / ( self . true_negatives + self . false_negatives ) if ( self . true_negatives + self . false_negatives ) > 0 else 0.0 ) def false_positive_rate ( self ) -> float : \"\"\" Calculate False Positive Rate (FPR). FPR measures the proportion of instances predicted as positive that are actually negative. Returns: float: The False Positive Rate of the model, calculated as false positives divided by the sum of false positives and true negatives. Returns 0.0 if both false positives and true negatives are zero. \"\"\" return ( self . false_positives / ( self . false_positives + self . true_negatives ) if ( self . false_positives + self . true_negatives ) > 0 else 0.0 ) def false_discovery_rate ( self ) -> float : \"\"\" Calculate False Discovery Rate (FDR). FDR measures the proportion of instances predicted as positive that are actually negative. Returns: float: The False Discovery Rate of the model, calculated as false positives divided by the sum of false positives and true positives. Returns 0.0 if both false positives and true positives are zero. \"\"\" return ( self . false_positives / ( self . false_positives + self . true_positives ) if ( self . false_positives + self . true_positives ) > 0 else 0.0 ) def false_negative_rate ( self ) -> float : \"\"\" Calculate False Negative Rate (FNR). FNR measures the proportion of instances that are actually positive but predicted as negative. Returns: float: The False Negative Rate of the model, calculated as false negatives divided by the sum of false negatives and true positives. Returns 0.0 if both false negatives and true positives are zero. \"\"\" return ( self . false_negatives / ( self . false_negatives + self . true_positives ) if ( self . false_negatives + self . true_positives ) > 0 else 0.0 ) def accuracy ( self ) -> float : \"\"\" Calculate Accuracy. Accuracy measures the proportion of correctly predicted instances out of all instances. Returns: float: The Accuracy of the model, calculated as the sum of true positives and true negatives divided by the sum of true positives, false positives, true negatives, and false negatives. Returns 0.0 if the total sum of counts is zero. \"\"\" return ( ( self . true_positives + self . true_negatives ) / ( self . true_positives + self . false_positives + self . true_negatives + self . false_negatives ) if ( self . true_positives + self . false_negatives + self . true_negatives + self . false_negatives ) > 0 else 0.0 ) def f1_score ( self ) -> float : \"\"\" Calculate F1 Score. F1 Score is the harmonic mean of precision and recall, providing a balance between false positives and false negatives. Returns: float: The F1 Score of the model, calculated as 2 * TP / (2 * TP + FP + FN). Returns 0.0 if the denominator is zero. \"\"\" return ( ( 2 * self . true_positives ) / (( 2 * self . true_positives ) + self . false_positives + self . false_negatives ) if ( self . true_positives + self . false_positives + self . false_negatives ) > 0 else 0.0 ) def matthews_correlation_coefficient ( self ) -> float : \"\"\" Calculate Matthews Correlation Coefficient (MCC). MCC is a measure of the quality of binary classifications, accounting for imbalances in the data. Returns: float: The Matthews Correlation Coefficient of the model, calculated as ((TP * TN) - (FP * FN)) / sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)). Returns 0.0 if the denominator is zero. \"\"\" return ( ( ( self . true_positives * self . true_negatives ) - ( self . false_positives * self . false_negatives ) ) / ( sqrt ( ( self . true_positives + self . false_positives ) * ( self . true_positives + self . false_negatives ) * ( self . true_negatives + self . false_positives ) * ( self . true_negatives + self . false_negatives ) ) ) if ( self . true_positives + self . false_negatives + self . true_negatives + self . false_negatives ) > 0 else 0.0 ) accuracy () Calculate Accuracy. Accuracy measures the proportion of correctly predicted instances out of all instances. Returns: Name Type Description float float The Accuracy of the model, calculated as the sum of true positives and true negatives divided by float the sum of true positives, false positives, true negatives, and false negatives. float Returns 0.0 if the total sum of counts is zero. Source code in src/pheval/analyse/binary_classification_stats.py 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 def accuracy ( self ) -> float : \"\"\" Calculate Accuracy. Accuracy measures the proportion of correctly predicted instances out of all instances. Returns: float: The Accuracy of the model, calculated as the sum of true positives and true negatives divided by the sum of true positives, false positives, true negatives, and false negatives. Returns 0.0 if the total sum of counts is zero. \"\"\" return ( ( self . true_positives + self . true_negatives ) / ( self . true_positives + self . false_positives + self . true_negatives + self . false_negatives ) if ( self . true_positives + self . false_negatives + self . true_negatives + self . false_negatives ) > 0 else 0.0 ) add_classification ( pheval_results , relevant_ranks ) Update binary classification metrics for known and unknown entities based on their ranks. Args: pheval_results: (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): The list of all pheval results. relevant_ranks (List[int]): A list of the ranks associated with the known entities. Source code in src/pheval/analyse/binary_classification_stats.py 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 def add_classification ( self , pheval_results : Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ], ], relevant_ranks : List [ int ], ) -> None : \"\"\" Update binary classification metrics for known and unknown entities based on their ranks. Args: pheval_results: (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): The list of all pheval results. relevant_ranks (List[int]): A list of the ranks associated with the known entities. \"\"\" self . add_classification_for_known_entities ( relevant_ranks ) self . add_classification_for_other_entities ( self . remove_relevant_ranks ( pheval_results , relevant_ranks ) ) self . add_labels_and_scores ( pheval_results , relevant_ranks ) add_classification_for_known_entities ( relevant_ranks ) Update binary classification metrics for known entities based on their ranking. Parameters: Name Type Description Default relevant_ranks List [ int ] A list of the ranks associated with the known entities. required Source code in src/pheval/analyse/binary_classification_stats.py 63 64 65 66 67 68 69 70 71 72 73 74 def add_classification_for_known_entities ( self , relevant_ranks : List [ int ]) -> None : \"\"\" Update binary classification metrics for known entities based on their ranking. Args: relevant_ranks (List[int]): A list of the ranks associated with the known entities. \"\"\" for rank in relevant_ranks : if rank == 1 : self . true_positives += 1 elif rank != 1 : self . false_negatives += 1 add_classification_for_other_entities ( ranks ) Update binary classification metrics for other entities based on their ranking. Parameters: Name Type Description Default ranks List [ int ] A list of the ranks for all other entities. required Source code in src/pheval/analyse/binary_classification_stats.py 76 77 78 79 80 81 82 83 84 85 86 87 def add_classification_for_other_entities ( self , ranks : List [ int ]) -> None : \"\"\" Update binary classification metrics for other entities based on their ranking. Args: ranks (List[int]): A list of the ranks for all other entities. \"\"\" for rank in ranks : if rank == 1 : self . false_positives += 1 elif rank != 1 : self . true_negatives += 1 add_labels_and_scores ( pheval_results , relevant_ranks ) Adds scores and labels from the PhEval results. Parameters: Name Type Description Default relevant_ranks List [ int ] A list of the ranks associated with the known entities. required Source code in src/pheval/analyse/binary_classification_stats.py 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 def add_labels_and_scores ( self , pheval_results : Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ], ], relevant_ranks : List [ int ], ): \"\"\" Adds scores and labels from the PhEval results. Args: pheval_results (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): List of all PhEval results relevant_ranks (List[int]): A list of the ranks associated with the known entities. \"\"\" relevant_ranks_copy = relevant_ranks . copy () for result in pheval_results : self . scores . append ( result . score ) label = 1 if result . rank in relevant_ranks_copy else 0 self . labels . append ( label ) relevant_ranks_copy . remove ( result . rank ) if label == 1 else None f1_score () Calculate F1 Score. F1 Score is the harmonic mean of precision and recall, providing a balance between false positives and false negatives. Returns: Name Type Description float float The F1 Score of the model, calculated as 2 * TP / (2 * TP + FP + FN). float Returns 0.0 if the denominator is zero. Source code in src/pheval/analyse/binary_classification_stats.py 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 def f1_score ( self ) -> float : \"\"\" Calculate F1 Score. F1 Score is the harmonic mean of precision and recall, providing a balance between false positives and false negatives. Returns: float: The F1 Score of the model, calculated as 2 * TP / (2 * TP + FP + FN). Returns 0.0 if the denominator is zero. \"\"\" return ( ( 2 * self . true_positives ) / (( 2 * self . true_positives ) + self . false_positives + self . false_negatives ) if ( self . true_positives + self . false_positives + self . false_negatives ) > 0 else 0.0 ) false_discovery_rate () Calculate False Discovery Rate (FDR). FDR measures the proportion of instances predicted as positive that are actually negative. Returns: Name Type Description float float The False Discovery Rate of the model, calculated as false positives divided by the sum of float false positives and true positives. Returns 0.0 if both false positives and true positives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 def false_discovery_rate ( self ) -> float : \"\"\" Calculate False Discovery Rate (FDR). FDR measures the proportion of instances predicted as positive that are actually negative. Returns: float: The False Discovery Rate of the model, calculated as false positives divided by the sum of false positives and true positives. Returns 0.0 if both false positives and true positives are zero. \"\"\" return ( self . false_positives / ( self . false_positives + self . true_positives ) if ( self . false_positives + self . true_positives ) > 0 else 0.0 ) false_negative_rate () Calculate False Negative Rate (FNR). FNR measures the proportion of instances that are actually positive but predicted as negative. Returns: Name Type Description float float The False Negative Rate of the model, calculated as false negatives divided by the sum of float false negatives and true positives. Returns 0.0 if both false negatives and true positives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 def false_negative_rate ( self ) -> float : \"\"\" Calculate False Negative Rate (FNR). FNR measures the proportion of instances that are actually positive but predicted as negative. Returns: float: The False Negative Rate of the model, calculated as false negatives divided by the sum of false negatives and true positives. Returns 0.0 if both false negatives and true positives are zero. \"\"\" return ( self . false_negatives / ( self . false_negatives + self . true_positives ) if ( self . false_negatives + self . true_positives ) > 0 else 0.0 ) false_positive_rate () Calculate False Positive Rate (FPR). FPR measures the proportion of instances predicted as positive that are actually negative. Returns: Name Type Description float float The False Positive Rate of the model, calculated as false positives divided by the sum of float false positives and true negatives. Returns 0.0 if both false positives and true negatives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 def false_positive_rate ( self ) -> float : \"\"\" Calculate False Positive Rate (FPR). FPR measures the proportion of instances predicted as positive that are actually negative. Returns: float: The False Positive Rate of the model, calculated as false positives divided by the sum of false positives and true negatives. Returns 0.0 if both false positives and true negatives are zero. \"\"\" return ( self . false_positives / ( self . false_positives + self . true_negatives ) if ( self . false_positives + self . true_negatives ) > 0 else 0.0 ) matthews_correlation_coefficient () Calculate Matthews Correlation Coefficient (MCC). MCC is a measure of the quality of binary classifications, accounting for imbalances in the data. Returns: Name Type Description float float The Matthews Correlation Coefficient of the model, calculated as float ((TP * TN) - (FP * FN)) / sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)). float Returns 0.0 if the denominator is zero. Source code in src/pheval/analyse/binary_classification_stats.py 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 def matthews_correlation_coefficient ( self ) -> float : \"\"\" Calculate Matthews Correlation Coefficient (MCC). MCC is a measure of the quality of binary classifications, accounting for imbalances in the data. Returns: float: The Matthews Correlation Coefficient of the model, calculated as ((TP * TN) - (FP * FN)) / sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)). Returns 0.0 if the denominator is zero. \"\"\" return ( ( ( self . true_positives * self . true_negatives ) - ( self . false_positives * self . false_negatives ) ) / ( sqrt ( ( self . true_positives + self . false_positives ) * ( self . true_positives + self . false_negatives ) * ( self . true_negatives + self . false_positives ) * ( self . true_negatives + self . false_negatives ) ) ) if ( self . true_positives + self . false_negatives + self . true_negatives + self . false_negatives ) > 0 else 0.0 ) negative_predictive_value () Calculate Negative Predictive Value (NPV). NPV measures the proportion of correctly predicted negative instances out of all instances predicted negative. Returns: Name Type Description float float The Negative Predictive Value of the model, calculated as true negatives divided by the sum of float true negatives and false negatives. Returns 0.0 if both true negatives and false negatives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 def negative_predictive_value ( self ) -> float : \"\"\" Calculate Negative Predictive Value (NPV). NPV measures the proportion of correctly predicted negative instances out of all instances predicted negative. Returns: float: The Negative Predictive Value of the model, calculated as true negatives divided by the sum of true negatives and false negatives. Returns 0.0 if both true negatives and false negatives are zero. \"\"\" return ( self . true_negatives / ( self . true_negatives + self . false_negatives ) if ( self . true_negatives + self . false_negatives ) > 0 else 0.0 ) precision () Calculate precision. Precision measures the proportion of correctly predicted positive instances out of all instances predicted as positive. Returns: Name Type Description float float The precision of the model, calculated as true positives divided by the sum of true positives float and false positives. Returns 0.0 if both true positives and false positives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 def precision ( self ) -> float : \"\"\" Calculate precision. Precision measures the proportion of correctly predicted positive instances out of all instances predicted as positive. Returns: float: The precision of the model, calculated as true positives divided by the sum of true positives and false positives. Returns 0.0 if both true positives and false positives are zero. \"\"\" return ( self . true_positives / ( self . true_positives + self . false_positives ) if ( self . true_positives + self . false_positives ) > 0 else 0.0 ) remove_relevant_ranks ( pheval_results , relevant_ranks ) staticmethod Remove the relevant entity ranks from all result ranks Args: pheval_results: (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): The list of all pheval results. relevant_ranks (List[int]): A list of the ranks associated with the known entities. Returns: Type Description List [ int ] List[int]: A list of the ranks with the relevant entity ranks removed. Source code in src/pheval/analyse/binary_classification_stats.py 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 @staticmethod def remove_relevant_ranks ( pheval_results : Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ], ], relevant_ranks : List [ int ], ) -> List [ int ]: \"\"\" Remove the relevant entity ranks from all result ranks Args: pheval_results: (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): The list of all pheval results. relevant_ranks (List[int]): A list of the ranks associated with the known entities. Returns: List[int]: A list of the ranks with the relevant entity ranks removed. \"\"\" all_result_ranks = [ pheval_result . rank for pheval_result in pheval_results ] for rank in relevant_ranks : if rank in all_result_ranks : all_result_ranks . remove ( rank ) continue return all_result_ranks sensitivity () Calculate sensitivity. Sensitivity measures the proportion of actual positive instances correctly identified by the model. Returns: Name Type Description float float The sensitivity of the model, calculated as true positives divided by the sum of true positives float and false negatives. Returns 0 if both true positives and false negatives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 def sensitivity ( self ) -> float : \"\"\" Calculate sensitivity. Sensitivity measures the proportion of actual positive instances correctly identified by the model. Returns: float: The sensitivity of the model, calculated as true positives divided by the sum of true positives and false negatives. Returns 0 if both true positives and false negatives are zero. \"\"\" return ( self . true_positives / ( self . true_positives + self . false_negatives ) if ( self . true_positives + self . false_negatives ) > 0 else 0.0 ) specificity () Calculate specificity. Specificity measures the proportion of actual negative instances correctly identified by the model. Returns: Name Type Description float float The specificity of the model, calculated as true negatives divided by the sum of true negatives float and false positives. Returns 0.0 if both true negatives and false positives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 def specificity ( self ) -> float : \"\"\" Calculate specificity. Specificity measures the proportion of actual negative instances correctly identified by the model. Returns: float: The specificity of the model, calculated as true negatives divided by the sum of true negatives and false positives. Returns 0.0 if both true negatives and false positives are zero. \"\"\" return ( self . true_negatives / ( self . true_negatives + self . false_positives ) if ( self . true_negatives + self . false_positives ) > 0 else 0.0 )","title":"Binary classification stats"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats","text":"A data class representing counts of different categories in binary classification. Attributes: Name Type Description true_positives int The count of true positive instances - i.e., the number of known entities ranked 1 in the results. true_negatives int The count of true negative instances - i.e., the number of non-relevant entities ranked at a position other than 1 in the results. false_positives int The count of false positive instances - i.e., the number of non-relevant entities ranked at position 1 in the results. false_negatives int The count of false negative instances - i.e., the number of known entities ranked at a position other than 1 in the results. Source code in src/pheval/analyse/binary_classification_stats.py 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 @dataclass class BinaryClassificationStats : \"\"\" A data class representing counts of different categories in binary classification. Attributes: true_positives (int): The count of true positive instances - i.e., the number of known entities ranked 1 in the results. true_negatives (int): The count of true negative instances - i.e., the number of non-relevant entities ranked at a position other than 1 in the results. false_positives (int): The count of false positive instances - i.e., the number of non-relevant entities ranked at position 1 in the results. false_negatives (int): The count of false negative instances - i.e., the number of known entities ranked at a position other than 1 in the results. \"\"\" true_positives : int = 0 true_negatives : int = 0 false_positives : int = 0 false_negatives : int = 0 labels : List = field ( default_factory = list ) scores : List = field ( default_factory = list ) @staticmethod def remove_relevant_ranks ( pheval_results : Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ], ], relevant_ranks : List [ int ], ) -> List [ int ]: \"\"\" Remove the relevant entity ranks from all result ranks Args: pheval_results: (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): The list of all pheval results. relevant_ranks (List[int]): A list of the ranks associated with the known entities. Returns: List[int]: A list of the ranks with the relevant entity ranks removed. \"\"\" all_result_ranks = [ pheval_result . rank for pheval_result in pheval_results ] for rank in relevant_ranks : if rank in all_result_ranks : all_result_ranks . remove ( rank ) continue return all_result_ranks def add_classification_for_known_entities ( self , relevant_ranks : List [ int ]) -> None : \"\"\" Update binary classification metrics for known entities based on their ranking. Args: relevant_ranks (List[int]): A list of the ranks associated with the known entities. \"\"\" for rank in relevant_ranks : if rank == 1 : self . true_positives += 1 elif rank != 1 : self . false_negatives += 1 def add_classification_for_other_entities ( self , ranks : List [ int ]) -> None : \"\"\" Update binary classification metrics for other entities based on their ranking. Args: ranks (List[int]): A list of the ranks for all other entities. \"\"\" for rank in ranks : if rank == 1 : self . false_positives += 1 elif rank != 1 : self . true_negatives += 1 def add_labels_and_scores ( self , pheval_results : Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ], ], relevant_ranks : List [ int ], ): \"\"\" Adds scores and labels from the PhEval results. Args: pheval_results (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): List of all PhEval results relevant_ranks (List[int]): A list of the ranks associated with the known entities. \"\"\" relevant_ranks_copy = relevant_ranks . copy () for result in pheval_results : self . scores . append ( result . score ) label = 1 if result . rank in relevant_ranks_copy else 0 self . labels . append ( label ) relevant_ranks_copy . remove ( result . rank ) if label == 1 else None def add_classification ( self , pheval_results : Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ], ], relevant_ranks : List [ int ], ) -> None : \"\"\" Update binary classification metrics for known and unknown entities based on their ranks. Args: pheval_results: (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): The list of all pheval results. relevant_ranks (List[int]): A list of the ranks associated with the known entities. \"\"\" self . add_classification_for_known_entities ( relevant_ranks ) self . add_classification_for_other_entities ( self . remove_relevant_ranks ( pheval_results , relevant_ranks ) ) self . add_labels_and_scores ( pheval_results , relevant_ranks ) def sensitivity ( self ) -> float : \"\"\" Calculate sensitivity. Sensitivity measures the proportion of actual positive instances correctly identified by the model. Returns: float: The sensitivity of the model, calculated as true positives divided by the sum of true positives and false negatives. Returns 0 if both true positives and false negatives are zero. \"\"\" return ( self . true_positives / ( self . true_positives + self . false_negatives ) if ( self . true_positives + self . false_negatives ) > 0 else 0.0 ) def specificity ( self ) -> float : \"\"\" Calculate specificity. Specificity measures the proportion of actual negative instances correctly identified by the model. Returns: float: The specificity of the model, calculated as true negatives divided by the sum of true negatives and false positives. Returns 0.0 if both true negatives and false positives are zero. \"\"\" return ( self . true_negatives / ( self . true_negatives + self . false_positives ) if ( self . true_negatives + self . false_positives ) > 0 else 0.0 ) def precision ( self ) -> float : \"\"\" Calculate precision. Precision measures the proportion of correctly predicted positive instances out of all instances predicted as positive. Returns: float: The precision of the model, calculated as true positives divided by the sum of true positives and false positives. Returns 0.0 if both true positives and false positives are zero. \"\"\" return ( self . true_positives / ( self . true_positives + self . false_positives ) if ( self . true_positives + self . false_positives ) > 0 else 0.0 ) def negative_predictive_value ( self ) -> float : \"\"\" Calculate Negative Predictive Value (NPV). NPV measures the proportion of correctly predicted negative instances out of all instances predicted negative. Returns: float: The Negative Predictive Value of the model, calculated as true negatives divided by the sum of true negatives and false negatives. Returns 0.0 if both true negatives and false negatives are zero. \"\"\" return ( self . true_negatives / ( self . true_negatives + self . false_negatives ) if ( self . true_negatives + self . false_negatives ) > 0 else 0.0 ) def false_positive_rate ( self ) -> float : \"\"\" Calculate False Positive Rate (FPR). FPR measures the proportion of instances predicted as positive that are actually negative. Returns: float: The False Positive Rate of the model, calculated as false positives divided by the sum of false positives and true negatives. Returns 0.0 if both false positives and true negatives are zero. \"\"\" return ( self . false_positives / ( self . false_positives + self . true_negatives ) if ( self . false_positives + self . true_negatives ) > 0 else 0.0 ) def false_discovery_rate ( self ) -> float : \"\"\" Calculate False Discovery Rate (FDR). FDR measures the proportion of instances predicted as positive that are actually negative. Returns: float: The False Discovery Rate of the model, calculated as false positives divided by the sum of false positives and true positives. Returns 0.0 if both false positives and true positives are zero. \"\"\" return ( self . false_positives / ( self . false_positives + self . true_positives ) if ( self . false_positives + self . true_positives ) > 0 else 0.0 ) def false_negative_rate ( self ) -> float : \"\"\" Calculate False Negative Rate (FNR). FNR measures the proportion of instances that are actually positive but predicted as negative. Returns: float: The False Negative Rate of the model, calculated as false negatives divided by the sum of false negatives and true positives. Returns 0.0 if both false negatives and true positives are zero. \"\"\" return ( self . false_negatives / ( self . false_negatives + self . true_positives ) if ( self . false_negatives + self . true_positives ) > 0 else 0.0 ) def accuracy ( self ) -> float : \"\"\" Calculate Accuracy. Accuracy measures the proportion of correctly predicted instances out of all instances. Returns: float: The Accuracy of the model, calculated as the sum of true positives and true negatives divided by the sum of true positives, false positives, true negatives, and false negatives. Returns 0.0 if the total sum of counts is zero. \"\"\" return ( ( self . true_positives + self . true_negatives ) / ( self . true_positives + self . false_positives + self . true_negatives + self . false_negatives ) if ( self . true_positives + self . false_negatives + self . true_negatives + self . false_negatives ) > 0 else 0.0 ) def f1_score ( self ) -> float : \"\"\" Calculate F1 Score. F1 Score is the harmonic mean of precision and recall, providing a balance between false positives and false negatives. Returns: float: The F1 Score of the model, calculated as 2 * TP / (2 * TP + FP + FN). Returns 0.0 if the denominator is zero. \"\"\" return ( ( 2 * self . true_positives ) / (( 2 * self . true_positives ) + self . false_positives + self . false_negatives ) if ( self . true_positives + self . false_positives + self . false_negatives ) > 0 else 0.0 ) def matthews_correlation_coefficient ( self ) -> float : \"\"\" Calculate Matthews Correlation Coefficient (MCC). MCC is a measure of the quality of binary classifications, accounting for imbalances in the data. Returns: float: The Matthews Correlation Coefficient of the model, calculated as ((TP * TN) - (FP * FN)) / sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)). Returns 0.0 if the denominator is zero. \"\"\" return ( ( ( self . true_positives * self . true_negatives ) - ( self . false_positives * self . false_negatives ) ) / ( sqrt ( ( self . true_positives + self . false_positives ) * ( self . true_positives + self . false_negatives ) * ( self . true_negatives + self . false_positives ) * ( self . true_negatives + self . false_negatives ) ) ) if ( self . true_positives + self . false_negatives + self . true_negatives + self . false_negatives ) > 0 else 0.0 )","title":"BinaryClassificationStats"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.accuracy","text":"Calculate Accuracy. Accuracy measures the proportion of correctly predicted instances out of all instances. Returns: Name Type Description float float The Accuracy of the model, calculated as the sum of true positives and true negatives divided by float the sum of true positives, false positives, true negatives, and false negatives. float Returns 0.0 if the total sum of counts is zero. Source code in src/pheval/analyse/binary_classification_stats.py 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 def accuracy ( self ) -> float : \"\"\" Calculate Accuracy. Accuracy measures the proportion of correctly predicted instances out of all instances. Returns: float: The Accuracy of the model, calculated as the sum of true positives and true negatives divided by the sum of true positives, false positives, true negatives, and false negatives. Returns 0.0 if the total sum of counts is zero. \"\"\" return ( ( self . true_positives + self . true_negatives ) / ( self . true_positives + self . false_positives + self . true_negatives + self . false_negatives ) if ( self . true_positives + self . false_negatives + self . true_negatives + self . false_negatives ) > 0 else 0.0 )","title":"accuracy"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.add_classification","text":"Update binary classification metrics for known and unknown entities based on their ranks. Args: pheval_results: (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): The list of all pheval results. relevant_ranks (List[int]): A list of the ranks associated with the known entities. Source code in src/pheval/analyse/binary_classification_stats.py 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 def add_classification ( self , pheval_results : Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ], ], relevant_ranks : List [ int ], ) -> None : \"\"\" Update binary classification metrics for known and unknown entities based on their ranks. Args: pheval_results: (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): The list of all pheval results. relevant_ranks (List[int]): A list of the ranks associated with the known entities. \"\"\" self . add_classification_for_known_entities ( relevant_ranks ) self . add_classification_for_other_entities ( self . remove_relevant_ranks ( pheval_results , relevant_ranks ) ) self . add_labels_and_scores ( pheval_results , relevant_ranks )","title":"add_classification"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.add_classification_for_known_entities","text":"Update binary classification metrics for known entities based on their ranking. Parameters: Name Type Description Default relevant_ranks List [ int ] A list of the ranks associated with the known entities. required Source code in src/pheval/analyse/binary_classification_stats.py 63 64 65 66 67 68 69 70 71 72 73 74 def add_classification_for_known_entities ( self , relevant_ranks : List [ int ]) -> None : \"\"\" Update binary classification metrics for known entities based on their ranking. Args: relevant_ranks (List[int]): A list of the ranks associated with the known entities. \"\"\" for rank in relevant_ranks : if rank == 1 : self . true_positives += 1 elif rank != 1 : self . false_negatives += 1","title":"add_classification_for_known_entities"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.add_classification_for_other_entities","text":"Update binary classification metrics for other entities based on their ranking. Parameters: Name Type Description Default ranks List [ int ] A list of the ranks for all other entities. required Source code in src/pheval/analyse/binary_classification_stats.py 76 77 78 79 80 81 82 83 84 85 86 87 def add_classification_for_other_entities ( self , ranks : List [ int ]) -> None : \"\"\" Update binary classification metrics for other entities based on their ranking. Args: ranks (List[int]): A list of the ranks for all other entities. \"\"\" for rank in ranks : if rank == 1 : self . false_positives += 1 elif rank != 1 : self . true_negatives += 1","title":"add_classification_for_other_entities"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.add_labels_and_scores","text":"Adds scores and labels from the PhEval results. Parameters: Name Type Description Default relevant_ranks List [ int ] A list of the ranks associated with the known entities. required Source code in src/pheval/analyse/binary_classification_stats.py 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 def add_labels_and_scores ( self , pheval_results : Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ], ], relevant_ranks : List [ int ], ): \"\"\" Adds scores and labels from the PhEval results. Args: pheval_results (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): List of all PhEval results relevant_ranks (List[int]): A list of the ranks associated with the known entities. \"\"\" relevant_ranks_copy = relevant_ranks . copy () for result in pheval_results : self . scores . append ( result . score ) label = 1 if result . rank in relevant_ranks_copy else 0 self . labels . append ( label ) relevant_ranks_copy . remove ( result . rank ) if label == 1 else None","title":"add_labels_and_scores"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.f1_score","text":"Calculate F1 Score. F1 Score is the harmonic mean of precision and recall, providing a balance between false positives and false negatives. Returns: Name Type Description float float The F1 Score of the model, calculated as 2 * TP / (2 * TP + FP + FN). float Returns 0.0 if the denominator is zero. Source code in src/pheval/analyse/binary_classification_stats.py 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 def f1_score ( self ) -> float : \"\"\" Calculate F1 Score. F1 Score is the harmonic mean of precision and recall, providing a balance between false positives and false negatives. Returns: float: The F1 Score of the model, calculated as 2 * TP / (2 * TP + FP + FN). Returns 0.0 if the denominator is zero. \"\"\" return ( ( 2 * self . true_positives ) / (( 2 * self . true_positives ) + self . false_positives + self . false_negatives ) if ( self . true_positives + self . false_positives + self . false_negatives ) > 0 else 0.0 )","title":"f1_score"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.false_discovery_rate","text":"Calculate False Discovery Rate (FDR). FDR measures the proportion of instances predicted as positive that are actually negative. Returns: Name Type Description float float The False Discovery Rate of the model, calculated as false positives divided by the sum of float false positives and true positives. Returns 0.0 if both false positives and true positives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 def false_discovery_rate ( self ) -> float : \"\"\" Calculate False Discovery Rate (FDR). FDR measures the proportion of instances predicted as positive that are actually negative. Returns: float: The False Discovery Rate of the model, calculated as false positives divided by the sum of false positives and true positives. Returns 0.0 if both false positives and true positives are zero. \"\"\" return ( self . false_positives / ( self . false_positives + self . true_positives ) if ( self . false_positives + self . true_positives ) > 0 else 0.0 )","title":"false_discovery_rate"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.false_negative_rate","text":"Calculate False Negative Rate (FNR). FNR measures the proportion of instances that are actually positive but predicted as negative. Returns: Name Type Description float float The False Negative Rate of the model, calculated as false negatives divided by the sum of float false negatives and true positives. Returns 0.0 if both false negatives and true positives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 def false_negative_rate ( self ) -> float : \"\"\" Calculate False Negative Rate (FNR). FNR measures the proportion of instances that are actually positive but predicted as negative. Returns: float: The False Negative Rate of the model, calculated as false negatives divided by the sum of false negatives and true positives. Returns 0.0 if both false negatives and true positives are zero. \"\"\" return ( self . false_negatives / ( self . false_negatives + self . true_positives ) if ( self . false_negatives + self . true_positives ) > 0 else 0.0 )","title":"false_negative_rate"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.false_positive_rate","text":"Calculate False Positive Rate (FPR). FPR measures the proportion of instances predicted as positive that are actually negative. Returns: Name Type Description float float The False Positive Rate of the model, calculated as false positives divided by the sum of float false positives and true negatives. Returns 0.0 if both false positives and true negatives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 def false_positive_rate ( self ) -> float : \"\"\" Calculate False Positive Rate (FPR). FPR measures the proportion of instances predicted as positive that are actually negative. Returns: float: The False Positive Rate of the model, calculated as false positives divided by the sum of false positives and true negatives. Returns 0.0 if both false positives and true negatives are zero. \"\"\" return ( self . false_positives / ( self . false_positives + self . true_negatives ) if ( self . false_positives + self . true_negatives ) > 0 else 0.0 )","title":"false_positive_rate"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.matthews_correlation_coefficient","text":"Calculate Matthews Correlation Coefficient (MCC). MCC is a measure of the quality of binary classifications, accounting for imbalances in the data. Returns: Name Type Description float float The Matthews Correlation Coefficient of the model, calculated as float ((TP * TN) - (FP * FN)) / sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)). float Returns 0.0 if the denominator is zero. Source code in src/pheval/analyse/binary_classification_stats.py 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 def matthews_correlation_coefficient ( self ) -> float : \"\"\" Calculate Matthews Correlation Coefficient (MCC). MCC is a measure of the quality of binary classifications, accounting for imbalances in the data. Returns: float: The Matthews Correlation Coefficient of the model, calculated as ((TP * TN) - (FP * FN)) / sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)). Returns 0.0 if the denominator is zero. \"\"\" return ( ( ( self . true_positives * self . true_negatives ) - ( self . false_positives * self . false_negatives ) ) / ( sqrt ( ( self . true_positives + self . false_positives ) * ( self . true_positives + self . false_negatives ) * ( self . true_negatives + self . false_positives ) * ( self . true_negatives + self . false_negatives ) ) ) if ( self . true_positives + self . false_negatives + self . true_negatives + self . false_negatives ) > 0 else 0.0 )","title":"matthews_correlation_coefficient"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.negative_predictive_value","text":"Calculate Negative Predictive Value (NPV). NPV measures the proportion of correctly predicted negative instances out of all instances predicted negative. Returns: Name Type Description float float The Negative Predictive Value of the model, calculated as true negatives divided by the sum of float true negatives and false negatives. Returns 0.0 if both true negatives and false negatives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 def negative_predictive_value ( self ) -> float : \"\"\" Calculate Negative Predictive Value (NPV). NPV measures the proportion of correctly predicted negative instances out of all instances predicted negative. Returns: float: The Negative Predictive Value of the model, calculated as true negatives divided by the sum of true negatives and false negatives. Returns 0.0 if both true negatives and false negatives are zero. \"\"\" return ( self . true_negatives / ( self . true_negatives + self . false_negatives ) if ( self . true_negatives + self . false_negatives ) > 0 else 0.0 )","title":"negative_predictive_value"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.precision","text":"Calculate precision. Precision measures the proportion of correctly predicted positive instances out of all instances predicted as positive. Returns: Name Type Description float float The precision of the model, calculated as true positives divided by the sum of true positives float and false positives. Returns 0.0 if both true positives and false positives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 def precision ( self ) -> float : \"\"\" Calculate precision. Precision measures the proportion of correctly predicted positive instances out of all instances predicted as positive. Returns: float: The precision of the model, calculated as true positives divided by the sum of true positives and false positives. Returns 0.0 if both true positives and false positives are zero. \"\"\" return ( self . true_positives / ( self . true_positives + self . false_positives ) if ( self . true_positives + self . false_positives ) > 0 else 0.0 )","title":"precision"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.remove_relevant_ranks","text":"Remove the relevant entity ranks from all result ranks Args: pheval_results: (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): The list of all pheval results. relevant_ranks (List[int]): A list of the ranks associated with the known entities. Returns: Type Description List [ int ] List[int]: A list of the ranks with the relevant entity ranks removed. Source code in src/pheval/analyse/binary_classification_stats.py 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 @staticmethod def remove_relevant_ranks ( pheval_results : Union [ List [ RankedPhEvalGeneResult ], List [ RankedPhEvalVariantResult ], List [ RankedPhEvalDiseaseResult ], ], relevant_ranks : List [ int ], ) -> List [ int ]: \"\"\" Remove the relevant entity ranks from all result ranks Args: pheval_results: (Union[List[RankedPhEvalGeneResult], List[RankedPhEvalVariantResult], List[RankedPhEvalDiseaseResult]]): The list of all pheval results. relevant_ranks (List[int]): A list of the ranks associated with the known entities. Returns: List[int]: A list of the ranks with the relevant entity ranks removed. \"\"\" all_result_ranks = [ pheval_result . rank for pheval_result in pheval_results ] for rank in relevant_ranks : if rank in all_result_ranks : all_result_ranks . remove ( rank ) continue return all_result_ranks","title":"remove_relevant_ranks"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.sensitivity","text":"Calculate sensitivity. Sensitivity measures the proportion of actual positive instances correctly identified by the model. Returns: Name Type Description float float The sensitivity of the model, calculated as true positives divided by the sum of true positives float and false negatives. Returns 0 if both true positives and false negatives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 def sensitivity ( self ) -> float : \"\"\" Calculate sensitivity. Sensitivity measures the proportion of actual positive instances correctly identified by the model. Returns: float: The sensitivity of the model, calculated as true positives divided by the sum of true positives and false negatives. Returns 0 if both true positives and false negatives are zero. \"\"\" return ( self . true_positives / ( self . true_positives + self . false_negatives ) if ( self . true_positives + self . false_negatives ) > 0 else 0.0 )","title":"sensitivity"},{"location":"api/pheval/analyse/binary_classification_stats/#src.pheval.analyse.binary_classification_stats.BinaryClassificationStats.specificity","text":"Calculate specificity. Specificity measures the proportion of actual negative instances correctly identified by the model. Returns: Name Type Description float float The specificity of the model, calculated as true negatives divided by the sum of true negatives float and false positives. Returns 0.0 if both true negatives and false positives are zero. Source code in src/pheval/analyse/binary_classification_stats.py 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 def specificity ( self ) -> float : \"\"\" Calculate specificity. Specificity measures the proportion of actual negative instances correctly identified by the model. Returns: float: The specificity of the model, calculated as true negatives divided by the sum of true negatives and false positives. Returns 0.0 if both true negatives and false positives are zero. \"\"\" return ( self . true_negatives / ( self . true_negatives + self . false_positives ) if ( self . true_negatives + self . false_positives ) > 0 else 0.0 )","title":"specificity"},{"location":"api/pheval/analyse/disease_prioritisation_analysis/","text":"AssessDiseasePrioritisation Class for assessing disease prioritisation based on thresholds and scoring orders. Source code in src/pheval/analyse/disease_prioritisation_analysis.py 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 class AssessDiseasePrioritisation : \"\"\"Class for assessing disease prioritisation based on thresholds and scoring orders.\"\"\" def __init__ ( self , phenopacket_path : Path , results_dir : Path , standardised_disease_results : List [ RankedPhEvalDiseaseResult ], threshold : float , score_order : str , proband_diseases : List [ ProbandDisease ], ): \"\"\" Initialise AssessDiseasePrioritisation class Args: phenopacket_path (Path): Path to the phenopacket file results_dir (Path): Path to the results directory standardised_disease_results (List[RankedPhEvalDiseaseResult]): List of ranked PhEval disease results threshold (float): Threshold for scores score_order (str): Score order for results, either ascending or descending proband_diseases (List[ProbandDisease]): List of proband diseases \"\"\" self . phenopacket_path = phenopacket_path self . results_dir = results_dir self . standardised_disease_results = standardised_disease_results self . threshold = threshold self . score_order = score_order self . proband_diseases = proband_diseases def _record_disease_prioritisation_match ( self , disease : ProbandDisease , result_entry : RankedPhEvalDiseaseResult , rank_stats : RankStats , ) -> DiseasePrioritisationResult : \"\"\" Record the disease prioritisation rank if found within the results Args: disease (ProbandDisease): Diagnosed proband disease result_entry (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry rank_stats (RankStats): RankStats class instance Returns: DiseasePrioritisationResult: Recorded correct disease prioritisation rank result \"\"\" rank = result_entry . rank rank_stats . add_rank ( rank ) return DiseasePrioritisationResult ( self . phenopacket_path , disease , rank ) def _assess_disease_with_threshold_ascending_order ( self , result_entry : RankedPhEvalDiseaseResult , disease : ProbandDisease , rank_stats : RankStats , ) -> DiseasePrioritisationResult : \"\"\" Record the disease prioritisation rank if it meets the ascending order threshold. This method checks if the disease prioritisation rank meets the ascending order threshold. If the score of the result entry is less than the threshold, it records the disease rank. Args: result_entry (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry disease (ProbandDisease): Diagnosed proband disease rank_stats (RankStats): RankStats class instance Returns: DiseasePrioritisationResult: Recorded correct disease prioritisation rank result \"\"\" if float ( self . threshold ) > float ( result_entry . score ): return self . _record_disease_prioritisation_match ( disease , result_entry , rank_stats ) def _assess_disease_with_threshold ( self , result_entry : RankedPhEvalDiseaseResult , disease : ProbandDisease , rank_stats : RankStats , ) -> DiseasePrioritisationResult : \"\"\" Record the disease prioritisation rank if it meets the score threshold. This method checks if the disease prioritisation rank meets the score threshold. If the score of the result entry is greater than the threshold, it records the disease rank. Args: result_entry (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry disease (ProbandDisease): Diagnosed proband disease rank_stats (RankStats): RankStats class instance Returns: DiseasePrioritisationResult: Recorded correct disease prioritisation rank result \"\"\" if float ( self . threshold ) < float ( result_entry . score ): return self . _record_disease_prioritisation_match ( disease , result_entry , rank_stats ) def _record_matched_disease ( self , disease : ProbandDisease , rank_stats : RankStats , standardised_disease_result : RankedPhEvalDiseaseResult , ) -> DiseasePrioritisationResult : \"\"\" Return the disease rank result - handling the specification of a threshold. This method determines and returns the disease rank result based on the specified threshold and score order. If the threshold is 0.0, it records the disease rank directly. Otherwise, it assesses the disease with the threshold based on the score order. Args: disease (ProbandDisease): Diagnosed proband disease rank_stats (RankStats): RankStats class instance standardised_disease_result (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry Returns: DiseasePrioritisationResult: Recorded correct disease prioritisation rank result \"\"\" if float ( self . threshold ) == 0.0 : return self . _record_disease_prioritisation_match ( disease , standardised_disease_result , rank_stats ) else : return ( self . _assess_disease_with_threshold ( standardised_disease_result , disease , rank_stats ) if self . score_order != \"ascending\" else self . _assess_disease_with_threshold_ascending_order ( standardised_disease_result , disease , rank_stats ) ) def assess_disease_prioritisation ( self , rank_stats : RankStats , rank_records : defaultdict , binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess disease prioritisation. This method assesses the prioritisation of diseases based on the provided criteria and records ranks using a PrioritisationRankRecorder. Args: rank_stats (RankStats): RankStats class instance rank_records (defaultdict): A defaultdict to store the correct ranked results. binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" relevant_ranks = [] for disease in self . proband_diseases : rank_stats . total += 1 disease_match = DiseasePrioritisationResult ( self . phenopacket_path , disease ) for standardised_disease_result in self . standardised_disease_results : if ( disease . disease_identifier == standardised_disease_result . disease_identifier or disease . disease_name == standardised_disease_result . disease_name ): disease_match = self . _record_matched_disease ( disease , rank_stats , standardised_disease_result ) ( relevant_ranks . append ( disease_match . rank ) if disease_match else relevant_ranks . append ( 0 ) ) break PrioritisationRankRecorder ( rank_stats . total , self . results_dir , ( DiseasePrioritisationResult ( self . phenopacket_path , disease ) if disease_match is None else disease_match ), rank_records , ) . record_rank () rank_stats . relevant_result_ranks . append ( relevant_ranks ) binary_classification_stats . add_classification ( self . standardised_disease_results , relevant_ranks ) __init__ ( phenopacket_path , results_dir , standardised_disease_results , threshold , score_order , proband_diseases ) Initialise AssessDiseasePrioritisation class Parameters: Name Type Description Default phenopacket_path Path Path to the phenopacket file required results_dir Path Path to the results directory required standardised_disease_results List [ RankedPhEvalDiseaseResult ] List of ranked PhEval disease results required threshold float Threshold for scores required score_order str Score order for results, either ascending or descending required proband_diseases List [ ProbandDisease ] List of proband diseases required Source code in src/pheval/analyse/disease_prioritisation_analysis.py 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 def __init__ ( self , phenopacket_path : Path , results_dir : Path , standardised_disease_results : List [ RankedPhEvalDiseaseResult ], threshold : float , score_order : str , proband_diseases : List [ ProbandDisease ], ): \"\"\" Initialise AssessDiseasePrioritisation class Args: phenopacket_path (Path): Path to the phenopacket file results_dir (Path): Path to the results directory standardised_disease_results (List[RankedPhEvalDiseaseResult]): List of ranked PhEval disease results threshold (float): Threshold for scores score_order (str): Score order for results, either ascending or descending proband_diseases (List[ProbandDisease]): List of proband diseases \"\"\" self . phenopacket_path = phenopacket_path self . results_dir = results_dir self . standardised_disease_results = standardised_disease_results self . threshold = threshold self . score_order = score_order self . proband_diseases = proband_diseases assess_disease_prioritisation ( rank_stats , rank_records , binary_classification_stats ) Assess disease prioritisation. This method assesses the prioritisation of diseases based on the provided criteria and records ranks using a PrioritisationRankRecorder. Parameters: Name Type Description Default rank_stats RankStats RankStats class instance required rank_records defaultdict A defaultdict to store the correct ranked results. required binary_classification_stats BinaryClassificationStats BinaryClassificationStats class instance. required Source code in src/pheval/analyse/disease_prioritisation_analysis.py 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 def assess_disease_prioritisation ( self , rank_stats : RankStats , rank_records : defaultdict , binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess disease prioritisation. This method assesses the prioritisation of diseases based on the provided criteria and records ranks using a PrioritisationRankRecorder. Args: rank_stats (RankStats): RankStats class instance rank_records (defaultdict): A defaultdict to store the correct ranked results. binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" relevant_ranks = [] for disease in self . proband_diseases : rank_stats . total += 1 disease_match = DiseasePrioritisationResult ( self . phenopacket_path , disease ) for standardised_disease_result in self . standardised_disease_results : if ( disease . disease_identifier == standardised_disease_result . disease_identifier or disease . disease_name == standardised_disease_result . disease_name ): disease_match = self . _record_matched_disease ( disease , rank_stats , standardised_disease_result ) ( relevant_ranks . append ( disease_match . rank ) if disease_match else relevant_ranks . append ( 0 ) ) break PrioritisationRankRecorder ( rank_stats . total , self . results_dir , ( DiseasePrioritisationResult ( self . phenopacket_path , disease ) if disease_match is None else disease_match ), rank_records , ) . record_rank () rank_stats . relevant_result_ranks . append ( relevant_ranks ) binary_classification_stats . add_classification ( self . standardised_disease_results , relevant_ranks ) assess_phenopacket_disease_prioritisation ( phenopacket_path , score_order , results_dir_and_input , threshold , disease_rank_stats , disease_rank_comparison , disease_binary_classification_stats ) Assess disease prioritisation for a Phenopacket by comparing PhEval standardised disease results against the recorded causative diseases for a proband in the Phenopacket. Parameters: Name Type Description Default phenopacket_path Path Path to the Phenopacket. required score_order str The order in which scores are arranged, either ascending or descending. required results_dir_and_input TrackInputOutputDirectories Input and output directories. required threshold float Threshold for assessment. required disease_rank_stats RankStats RankStats class instance. required disease_rank_comparison defaultdict Default dictionary for disease rank comparisons. required disease_binary_classification_stats BinaryClassificationStats BinaryClassificationStats class instance. required Source code in src/pheval/analyse/disease_prioritisation_analysis.py 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 def assess_phenopacket_disease_prioritisation ( phenopacket_path : Path , score_order : str , results_dir_and_input : TrackInputOutputDirectories , threshold : float , disease_rank_stats : RankStats , disease_rank_comparison : defaultdict , disease_binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess disease prioritisation for a Phenopacket by comparing PhEval standardised disease results against the recorded causative diseases for a proband in the Phenopacket. Args: phenopacket_path (Path): Path to the Phenopacket. score_order (str): The order in which scores are arranged, either ascending or descending. results_dir_and_input (TrackInputOutputDirectories): Input and output directories. threshold (float): Threshold for assessment. disease_rank_stats (RankStats): RankStats class instance. disease_rank_comparison (defaultdict): Default dictionary for disease rank comparisons. disease_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" standardised_disease_result = results_dir_and_input . results_dir . joinpath ( f \"pheval_disease_results/ { phenopacket_path . stem } -pheval_disease_result.tsv\" ) pheval_disease_result = read_standardised_result ( standardised_disease_result ) proband_diseases = _obtain_causative_diseases ( phenopacket_path ) AssessDiseasePrioritisation ( phenopacket_path , results_dir_and_input . results_dir . joinpath ( \"pheval_disease_results/\" ), parse_pheval_result ( RankedPhEvalDiseaseResult , pheval_disease_result ), threshold , score_order , proband_diseases , ) . assess_disease_prioritisation ( disease_rank_stats , disease_rank_comparison , disease_binary_classification_stats ) benchmark_disease_prioritisation ( results_directory_and_input , score_order , threshold , disease_rank_comparison ) Benchmark a directory based on disease prioritisation results. Parameters: Name Type Description Default results_directory_and_input TrackInputOutputDirectories Input and output directories. required score_order str The order in which scores are arranged. required threshold float Threshold for assessment. required disease_rank_comparison defaultdict Default dictionary for disease rank comparisons. required Returns: Name Type Description BenchmarkRunResults An object containing benchmarking results for disease prioritisation, including ranks and rank statistics for the benchmarked directory. Source code in src/pheval/analyse/disease_prioritisation_analysis.py 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 def benchmark_disease_prioritisation ( results_directory_and_input : TrackInputOutputDirectories , score_order : str , threshold : float , disease_rank_comparison : defaultdict , ): \"\"\" Benchmark a directory based on disease prioritisation results. Args: results_directory_and_input (TrackInputOutputDirectories): Input and output directories. score_order (str): The order in which scores are arranged. threshold (float): Threshold for assessment. disease_rank_comparison (defaultdict): Default dictionary for disease rank comparisons. Returns: BenchmarkRunResults: An object containing benchmarking results for disease prioritisation, including ranks and rank statistics for the benchmarked directory. \"\"\" disease_rank_stats = RankStats () disease_binary_classification_stats = BinaryClassificationStats () for phenopacket_path in all_files ( results_directory_and_input . phenopacket_dir ): assess_phenopacket_disease_prioritisation ( phenopacket_path , score_order , results_directory_and_input , threshold , disease_rank_stats , disease_rank_comparison , disease_binary_classification_stats , ) return BenchmarkRunResults ( results_dir = results_directory_and_input . results_dir , ranks = disease_rank_comparison , rank_stats = disease_rank_stats , binary_classification_stats = disease_binary_classification_stats , )","title":"Disease prioritisation analysis"},{"location":"api/pheval/analyse/disease_prioritisation_analysis/#src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioritisation","text":"Class for assessing disease prioritisation based on thresholds and scoring orders. Source code in src/pheval/analyse/disease_prioritisation_analysis.py 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 class AssessDiseasePrioritisation : \"\"\"Class for assessing disease prioritisation based on thresholds and scoring orders.\"\"\" def __init__ ( self , phenopacket_path : Path , results_dir : Path , standardised_disease_results : List [ RankedPhEvalDiseaseResult ], threshold : float , score_order : str , proband_diseases : List [ ProbandDisease ], ): \"\"\" Initialise AssessDiseasePrioritisation class Args: phenopacket_path (Path): Path to the phenopacket file results_dir (Path): Path to the results directory standardised_disease_results (List[RankedPhEvalDiseaseResult]): List of ranked PhEval disease results threshold (float): Threshold for scores score_order (str): Score order for results, either ascending or descending proband_diseases (List[ProbandDisease]): List of proband diseases \"\"\" self . phenopacket_path = phenopacket_path self . results_dir = results_dir self . standardised_disease_results = standardised_disease_results self . threshold = threshold self . score_order = score_order self . proband_diseases = proband_diseases def _record_disease_prioritisation_match ( self , disease : ProbandDisease , result_entry : RankedPhEvalDiseaseResult , rank_stats : RankStats , ) -> DiseasePrioritisationResult : \"\"\" Record the disease prioritisation rank if found within the results Args: disease (ProbandDisease): Diagnosed proband disease result_entry (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry rank_stats (RankStats): RankStats class instance Returns: DiseasePrioritisationResult: Recorded correct disease prioritisation rank result \"\"\" rank = result_entry . rank rank_stats . add_rank ( rank ) return DiseasePrioritisationResult ( self . phenopacket_path , disease , rank ) def _assess_disease_with_threshold_ascending_order ( self , result_entry : RankedPhEvalDiseaseResult , disease : ProbandDisease , rank_stats : RankStats , ) -> DiseasePrioritisationResult : \"\"\" Record the disease prioritisation rank if it meets the ascending order threshold. This method checks if the disease prioritisation rank meets the ascending order threshold. If the score of the result entry is less than the threshold, it records the disease rank. Args: result_entry (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry disease (ProbandDisease): Diagnosed proband disease rank_stats (RankStats): RankStats class instance Returns: DiseasePrioritisationResult: Recorded correct disease prioritisation rank result \"\"\" if float ( self . threshold ) > float ( result_entry . score ): return self . _record_disease_prioritisation_match ( disease , result_entry , rank_stats ) def _assess_disease_with_threshold ( self , result_entry : RankedPhEvalDiseaseResult , disease : ProbandDisease , rank_stats : RankStats , ) -> DiseasePrioritisationResult : \"\"\" Record the disease prioritisation rank if it meets the score threshold. This method checks if the disease prioritisation rank meets the score threshold. If the score of the result entry is greater than the threshold, it records the disease rank. Args: result_entry (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry disease (ProbandDisease): Diagnosed proband disease rank_stats (RankStats): RankStats class instance Returns: DiseasePrioritisationResult: Recorded correct disease prioritisation rank result \"\"\" if float ( self . threshold ) < float ( result_entry . score ): return self . _record_disease_prioritisation_match ( disease , result_entry , rank_stats ) def _record_matched_disease ( self , disease : ProbandDisease , rank_stats : RankStats , standardised_disease_result : RankedPhEvalDiseaseResult , ) -> DiseasePrioritisationResult : \"\"\" Return the disease rank result - handling the specification of a threshold. This method determines and returns the disease rank result based on the specified threshold and score order. If the threshold is 0.0, it records the disease rank directly. Otherwise, it assesses the disease with the threshold based on the score order. Args: disease (ProbandDisease): Diagnosed proband disease rank_stats (RankStats): RankStats class instance standardised_disease_result (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry Returns: DiseasePrioritisationResult: Recorded correct disease prioritisation rank result \"\"\" if float ( self . threshold ) == 0.0 : return self . _record_disease_prioritisation_match ( disease , standardised_disease_result , rank_stats ) else : return ( self . _assess_disease_with_threshold ( standardised_disease_result , disease , rank_stats ) if self . score_order != \"ascending\" else self . _assess_disease_with_threshold_ascending_order ( standardised_disease_result , disease , rank_stats ) ) def assess_disease_prioritisation ( self , rank_stats : RankStats , rank_records : defaultdict , binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess disease prioritisation. This method assesses the prioritisation of diseases based on the provided criteria and records ranks using a PrioritisationRankRecorder. Args: rank_stats (RankStats): RankStats class instance rank_records (defaultdict): A defaultdict to store the correct ranked results. binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" relevant_ranks = [] for disease in self . proband_diseases : rank_stats . total += 1 disease_match = DiseasePrioritisationResult ( self . phenopacket_path , disease ) for standardised_disease_result in self . standardised_disease_results : if ( disease . disease_identifier == standardised_disease_result . disease_identifier or disease . disease_name == standardised_disease_result . disease_name ): disease_match = self . _record_matched_disease ( disease , rank_stats , standardised_disease_result ) ( relevant_ranks . append ( disease_match . rank ) if disease_match else relevant_ranks . append ( 0 ) ) break PrioritisationRankRecorder ( rank_stats . total , self . results_dir , ( DiseasePrioritisationResult ( self . phenopacket_path , disease ) if disease_match is None else disease_match ), rank_records , ) . record_rank () rank_stats . relevant_result_ranks . append ( relevant_ranks ) binary_classification_stats . add_classification ( self . standardised_disease_results , relevant_ranks )","title":"AssessDiseasePrioritisation"},{"location":"api/pheval/analyse/disease_prioritisation_analysis/#src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioritisation.__init__","text":"Initialise AssessDiseasePrioritisation class Parameters: Name Type Description Default phenopacket_path Path Path to the phenopacket file required results_dir Path Path to the results directory required standardised_disease_results List [ RankedPhEvalDiseaseResult ] List of ranked PhEval disease results required threshold float Threshold for scores required score_order str Score order for results, either ascending or descending required proband_diseases List [ ProbandDisease ] List of proband diseases required Source code in src/pheval/analyse/disease_prioritisation_analysis.py 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 def __init__ ( self , phenopacket_path : Path , results_dir : Path , standardised_disease_results : List [ RankedPhEvalDiseaseResult ], threshold : float , score_order : str , proband_diseases : List [ ProbandDisease ], ): \"\"\" Initialise AssessDiseasePrioritisation class Args: phenopacket_path (Path): Path to the phenopacket file results_dir (Path): Path to the results directory standardised_disease_results (List[RankedPhEvalDiseaseResult]): List of ranked PhEval disease results threshold (float): Threshold for scores score_order (str): Score order for results, either ascending or descending proband_diseases (List[ProbandDisease]): List of proband diseases \"\"\" self . phenopacket_path = phenopacket_path self . results_dir = results_dir self . standardised_disease_results = standardised_disease_results self . threshold = threshold self . score_order = score_order self . proband_diseases = proband_diseases","title":"__init__"},{"location":"api/pheval/analyse/disease_prioritisation_analysis/#src.pheval.analyse.disease_prioritisation_analysis.AssessDiseasePrioritisation.assess_disease_prioritisation","text":"Assess disease prioritisation. This method assesses the prioritisation of diseases based on the provided criteria and records ranks using a PrioritisationRankRecorder. Parameters: Name Type Description Default rank_stats RankStats RankStats class instance required rank_records defaultdict A defaultdict to store the correct ranked results. required binary_classification_stats BinaryClassificationStats BinaryClassificationStats class instance. required Source code in src/pheval/analyse/disease_prioritisation_analysis.py 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 def assess_disease_prioritisation ( self , rank_stats : RankStats , rank_records : defaultdict , binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess disease prioritisation. This method assesses the prioritisation of diseases based on the provided criteria and records ranks using a PrioritisationRankRecorder. Args: rank_stats (RankStats): RankStats class instance rank_records (defaultdict): A defaultdict to store the correct ranked results. binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" relevant_ranks = [] for disease in self . proband_diseases : rank_stats . total += 1 disease_match = DiseasePrioritisationResult ( self . phenopacket_path , disease ) for standardised_disease_result in self . standardised_disease_results : if ( disease . disease_identifier == standardised_disease_result . disease_identifier or disease . disease_name == standardised_disease_result . disease_name ): disease_match = self . _record_matched_disease ( disease , rank_stats , standardised_disease_result ) ( relevant_ranks . append ( disease_match . rank ) if disease_match else relevant_ranks . append ( 0 ) ) break PrioritisationRankRecorder ( rank_stats . total , self . results_dir , ( DiseasePrioritisationResult ( self . phenopacket_path , disease ) if disease_match is None else disease_match ), rank_records , ) . record_rank () rank_stats . relevant_result_ranks . append ( relevant_ranks ) binary_classification_stats . add_classification ( self . standardised_disease_results , relevant_ranks )","title":"assess_disease_prioritisation"},{"location":"api/pheval/analyse/disease_prioritisation_analysis/#src.pheval.analyse.disease_prioritisation_analysis.assess_phenopacket_disease_prioritisation","text":"Assess disease prioritisation for a Phenopacket by comparing PhEval standardised disease results against the recorded causative diseases for a proband in the Phenopacket. Parameters: Name Type Description Default phenopacket_path Path Path to the Phenopacket. required score_order str The order in which scores are arranged, either ascending or descending. required results_dir_and_input TrackInputOutputDirectories Input and output directories. required threshold float Threshold for assessment. required disease_rank_stats RankStats RankStats class instance. required disease_rank_comparison defaultdict Default dictionary for disease rank comparisons. required disease_binary_classification_stats BinaryClassificationStats BinaryClassificationStats class instance. required Source code in src/pheval/analyse/disease_prioritisation_analysis.py 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 def assess_phenopacket_disease_prioritisation ( phenopacket_path : Path , score_order : str , results_dir_and_input : TrackInputOutputDirectories , threshold : float , disease_rank_stats : RankStats , disease_rank_comparison : defaultdict , disease_binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess disease prioritisation for a Phenopacket by comparing PhEval standardised disease results against the recorded causative diseases for a proband in the Phenopacket. Args: phenopacket_path (Path): Path to the Phenopacket. score_order (str): The order in which scores are arranged, either ascending or descending. results_dir_and_input (TrackInputOutputDirectories): Input and output directories. threshold (float): Threshold for assessment. disease_rank_stats (RankStats): RankStats class instance. disease_rank_comparison (defaultdict): Default dictionary for disease rank comparisons. disease_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" standardised_disease_result = results_dir_and_input . results_dir . joinpath ( f \"pheval_disease_results/ { phenopacket_path . stem } -pheval_disease_result.tsv\" ) pheval_disease_result = read_standardised_result ( standardised_disease_result ) proband_diseases = _obtain_causative_diseases ( phenopacket_path ) AssessDiseasePrioritisation ( phenopacket_path , results_dir_and_input . results_dir . joinpath ( \"pheval_disease_results/\" ), parse_pheval_result ( RankedPhEvalDiseaseResult , pheval_disease_result ), threshold , score_order , proband_diseases , ) . assess_disease_prioritisation ( disease_rank_stats , disease_rank_comparison , disease_binary_classification_stats )","title":"assess_phenopacket_disease_prioritisation"},{"location":"api/pheval/analyse/disease_prioritisation_analysis/#src.pheval.analyse.disease_prioritisation_analysis.benchmark_disease_prioritisation","text":"Benchmark a directory based on disease prioritisation results. Parameters: Name Type Description Default results_directory_and_input TrackInputOutputDirectories Input and output directories. required score_order str The order in which scores are arranged. required threshold float Threshold for assessment. required disease_rank_comparison defaultdict Default dictionary for disease rank comparisons. required Returns: Name Type Description BenchmarkRunResults An object containing benchmarking results for disease prioritisation, including ranks and rank statistics for the benchmarked directory. Source code in src/pheval/analyse/disease_prioritisation_analysis.py 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 def benchmark_disease_prioritisation ( results_directory_and_input : TrackInputOutputDirectories , score_order : str , threshold : float , disease_rank_comparison : defaultdict , ): \"\"\" Benchmark a directory based on disease prioritisation results. Args: results_directory_and_input (TrackInputOutputDirectories): Input and output directories. score_order (str): The order in which scores are arranged. threshold (float): Threshold for assessment. disease_rank_comparison (defaultdict): Default dictionary for disease rank comparisons. Returns: BenchmarkRunResults: An object containing benchmarking results for disease prioritisation, including ranks and rank statistics for the benchmarked directory. \"\"\" disease_rank_stats = RankStats () disease_binary_classification_stats = BinaryClassificationStats () for phenopacket_path in all_files ( results_directory_and_input . phenopacket_dir ): assess_phenopacket_disease_prioritisation ( phenopacket_path , score_order , results_directory_and_input , threshold , disease_rank_stats , disease_rank_comparison , disease_binary_classification_stats , ) return BenchmarkRunResults ( results_dir = results_directory_and_input . results_dir , ranks = disease_rank_comparison , rank_stats = disease_rank_stats , binary_classification_stats = disease_binary_classification_stats , )","title":"benchmark_disease_prioritisation"},{"location":"api/pheval/analyse/gene_prioritisation_analysis/","text":"AssessGenePrioritisation Class for assessing gene prioritisation based on thresholds and scoring orders. Source code in src/pheval/analyse/gene_prioritisation_analysis.py 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 class AssessGenePrioritisation : \"\"\"Class for assessing gene prioritisation based on thresholds and scoring orders.\"\"\" def __init__ ( self , phenopacket_path : Path , results_dir : Path , standardised_gene_results : List [ RankedPhEvalGeneResult ], threshold : float , score_order : str , proband_causative_genes : List [ ProbandCausativeGene ], ): \"\"\" Initialise AssessGenePrioritisation class. Args: phenopacket_path (Path): Path to the phenopacket file results_dir (Path): Path to the results directory standardised_gene_results (List[RankedPhEvalGeneResult]): List of ranked PhEval gene results threshold (float): Threshold for scores score_order (str): Score order for results, either ascending or descending proband_causative_genes (List[ProbandCausativeGene]): List of proband causative genes \"\"\" self . phenopacket_path = phenopacket_path self . results_dir = results_dir self . standardised_gene_results = standardised_gene_results self . threshold = threshold self . score_order = score_order self . proband_causative_genes = proband_causative_genes def _record_gene_prioritisation_match ( self , gene : ProbandCausativeGene , result_entry : RankedPhEvalGeneResult , rank_stats : RankStats , ) -> GenePrioritisationResult : \"\"\" Record the gene prioritisation rank if found within the results Args: gene (ProbandCausativeGene): Diagnosed proband gene result_entry (RankedPhEvalGeneResult): Ranked PhEval gene result entry rank_stats (RankStats): RankStats class instance Returns: GenePrioritisationResult: Recorded correct gene prioritisation rank result \"\"\" rank = result_entry . rank rank_stats . add_rank ( rank ) return GenePrioritisationResult ( self . phenopacket_path , gene . gene_symbol , rank ) def _assess_gene_with_threshold_ascending_order ( self , result_entry : RankedPhEvalGeneResult , gene : ProbandCausativeGene , rank_stats : RankStats , ) -> GenePrioritisationResult : \"\"\" Record the gene prioritisation rank if it meets the ascending order threshold. This method checks if the gene prioritisation rank meets the ascending order threshold. If the score of the result entry is less than the threshold, it records the gene rank. Args: result_entry (RankedPhEvalGeneResult): Ranked PhEval gene result entry gene (ProbandCausativeGene): Diagnosed proband gene rank_stats (RankStats): RankStats class instance Returns: GenePrioritisationResult: Recorded correct gene prioritisation rank result \"\"\" if float ( self . threshold ) > float ( result_entry . score ): return self . _record_gene_prioritisation_match ( gene , result_entry , rank_stats ) def _assess_gene_with_threshold ( self , result_entry : RankedPhEvalGeneResult , gene : ProbandCausativeGene , rank_stats : RankStats , ) -> GenePrioritisationResult : \"\"\" Record the gene prioritisation rank if it meets the score threshold. This method checks if the gene prioritisation rank meets the score threshold. If the score of the result entry is greater than the threshold, it records the gene rank. Args: result_entry (RankedPhEvalResult): Ranked PhEval gene result entry gene (ProbandCausativeGene): Diagnosed proband gene rank_stats (RankStats): RankStats class instance Returns: GenePrioritisationResult: Recorded correct gene prioritisation rank result \"\"\" if float ( self . threshold ) < float ( result_entry . score ): return self . _record_gene_prioritisation_match ( gene , result_entry , rank_stats ) def _record_matched_gene ( self , gene : ProbandCausativeGene , rank_stats : RankStats , standardised_gene_result : RankedPhEvalGeneResult , ) -> GenePrioritisationResult : \"\"\" Return the gene rank result - handling the specification of a threshold. This method determines and returns the gene rank result based on the specified threshold and score order. If the threshold is 0.0, it records the gene rank directly. Otherwise, it assesses the gene with the threshold based on the score order. Args: gene (ProbandCausativeGene): Diagnosed proband gene rank_stats (RankStats): RankStats class instance standardised_gene_result (RankedPhEvalGeneResult): Ranked PhEval gene result entry Returns: GenePrioritisationResult: Recorded correct gene prioritisation rank result \"\"\" if float ( self . threshold ) == 0.0 : return self . _record_gene_prioritisation_match ( gene , standardised_gene_result , rank_stats ) else : return ( self . _assess_gene_with_threshold ( standardised_gene_result , gene , rank_stats ) if self . score_order != \"ascending\" else self . _assess_gene_with_threshold_ascending_order ( standardised_gene_result , gene , rank_stats ) ) @staticmethod def _check_string_representation ( entity : str ) -> Union [ List [ str ], str ]: \"\"\" Check if the input string is a representation of a list and returns the list if true, otherwise the string. Args: entity (str): The input entity to check. Returns: Union[List[str], str]: A list if the input string is a list representation, otherwise the original string. \"\"\" list_pattern = re . compile ( r \"^\\[\\s*(?:[^\\[\\],\\s]+(?:\\s*,\\s*[^\\[\\],\\s]+)*)?\\s*\\]$\" ) if list_pattern . match ( str ( entity )): return ast . literal_eval ( entity ) else : return entity def assess_gene_prioritisation ( self , rank_stats : RankStats , rank_records : defaultdict , binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess gene prioritisation. This method assesses the prioritisation of genes based on the provided criteria and records ranks using a PrioritisationRankRecorder. Args: rank_stats (RankStats): RankStats class instance rank_records (defaultdict): A defaultdict to store the correct ranked results. binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" relevant_ranks = [] for gene in self . proband_causative_genes : rank_stats . total += 1 gene_match = GenePrioritisationResult ( self . phenopacket_path , gene . gene_symbol ) for standardised_gene_result in self . standardised_gene_results : gene_identifier = self . _check_string_representation ( standardised_gene_result . gene_identifier ) gene_symbol = self . _check_string_representation ( standardised_gene_result . gene_symbol ) if ( isinstance ( gene_identifier , list ) and gene . gene_identifier in gene_identifier or isinstance ( gene_identifier , str ) and gene . gene_identifier == str or isinstance ( gene_symbol , list ) and gene . gene_symbol in gene_symbol or isinstance ( gene_symbol , str ) and gene . gene_symbol == gene_symbol ): gene_match = self . _record_matched_gene ( gene , rank_stats , standardised_gene_result ) ( relevant_ranks . append ( gene_match . rank ) if gene_match else relevant_ranks . append ( 0 ) ) break PrioritisationRankRecorder ( rank_stats . total , self . results_dir , ( GenePrioritisationResult ( self . phenopacket_path , gene . gene_symbol ) if gene_match is None else gene_match ), rank_records , ) . record_rank () rank_stats . relevant_result_ranks . append ( relevant_ranks ) binary_classification_stats . add_classification ( pheval_results = self . standardised_gene_results , relevant_ranks = relevant_ranks ) __init__ ( phenopacket_path , results_dir , standardised_gene_results , threshold , score_order , proband_causative_genes ) Initialise AssessGenePrioritisation class. Parameters: Name Type Description Default phenopacket_path Path Path to the phenopacket file required results_dir Path Path to the results directory required standardised_gene_results List [ RankedPhEvalGeneResult ] List of ranked PhEval gene results required threshold float Threshold for scores required score_order str Score order for results, either ascending or descending required proband_causative_genes List [ ProbandCausativeGene ] List of proband causative genes required Source code in src/pheval/analyse/gene_prioritisation_analysis.py 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 def __init__ ( self , phenopacket_path : Path , results_dir : Path , standardised_gene_results : List [ RankedPhEvalGeneResult ], threshold : float , score_order : str , proband_causative_genes : List [ ProbandCausativeGene ], ): \"\"\" Initialise AssessGenePrioritisation class. Args: phenopacket_path (Path): Path to the phenopacket file results_dir (Path): Path to the results directory standardised_gene_results (List[RankedPhEvalGeneResult]): List of ranked PhEval gene results threshold (float): Threshold for scores score_order (str): Score order for results, either ascending or descending proband_causative_genes (List[ProbandCausativeGene]): List of proband causative genes \"\"\" self . phenopacket_path = phenopacket_path self . results_dir = results_dir self . standardised_gene_results = standardised_gene_results self . threshold = threshold self . score_order = score_order self . proband_causative_genes = proband_causative_genes assess_gene_prioritisation ( rank_stats , rank_records , binary_classification_stats ) Assess gene prioritisation. This method assesses the prioritisation of genes based on the provided criteria and records ranks using a PrioritisationRankRecorder. Parameters: Name Type Description Default rank_stats RankStats RankStats class instance required rank_records defaultdict A defaultdict to store the correct ranked results. required binary_classification_stats BinaryClassificationStats BinaryClassificationStats class instance. required Source code in src/pheval/analyse/gene_prioritisation_analysis.py 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 def assess_gene_prioritisation ( self , rank_stats : RankStats , rank_records : defaultdict , binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess gene prioritisation. This method assesses the prioritisation of genes based on the provided criteria and records ranks using a PrioritisationRankRecorder. Args: rank_stats (RankStats): RankStats class instance rank_records (defaultdict): A defaultdict to store the correct ranked results. binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" relevant_ranks = [] for gene in self . proband_causative_genes : rank_stats . total += 1 gene_match = GenePrioritisationResult ( self . phenopacket_path , gene . gene_symbol ) for standardised_gene_result in self . standardised_gene_results : gene_identifier = self . _check_string_representation ( standardised_gene_result . gene_identifier ) gene_symbol = self . _check_string_representation ( standardised_gene_result . gene_symbol ) if ( isinstance ( gene_identifier , list ) and gene . gene_identifier in gene_identifier or isinstance ( gene_identifier , str ) and gene . gene_identifier == str or isinstance ( gene_symbol , list ) and gene . gene_symbol in gene_symbol or isinstance ( gene_symbol , str ) and gene . gene_symbol == gene_symbol ): gene_match = self . _record_matched_gene ( gene , rank_stats , standardised_gene_result ) ( relevant_ranks . append ( gene_match . rank ) if gene_match else relevant_ranks . append ( 0 ) ) break PrioritisationRankRecorder ( rank_stats . total , self . results_dir , ( GenePrioritisationResult ( self . phenopacket_path , gene . gene_symbol ) if gene_match is None else gene_match ), rank_records , ) . record_rank () rank_stats . relevant_result_ranks . append ( relevant_ranks ) binary_classification_stats . add_classification ( pheval_results = self . standardised_gene_results , relevant_ranks = relevant_ranks ) assess_phenopacket_gene_prioritisation ( phenopacket_path , score_order , results_dir_and_input , threshold , gene_rank_stats , gene_rank_comparison , gene_binary_classification_stats ) Assess gene prioritisation for a Phenopacket by comparing PhEval standardised gene results against the recorded causative genes for a proband in the Phenopacket. Parameters: Name Type Description Default phenopacket_path Path Path to the Phenopacket. required score_order str The order in which scores are arranged, either ascending or descending. required results_dir_and_input TrackInputOutputDirectories Input and output directories. required threshold float Threshold for assessment. required gene_rank_stats RankStats RankStats class instance. required gene_rank_comparison defaultdict Default dictionary for gene rank comparisons. required gene_binary_classification_stats BinaryClassificationStats BinaryClassificationStats class instance. required Source code in src/pheval/analyse/gene_prioritisation_analysis.py 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 def assess_phenopacket_gene_prioritisation ( phenopacket_path : Path , score_order : str , results_dir_and_input : TrackInputOutputDirectories , threshold : float , gene_rank_stats : RankStats , gene_rank_comparison : defaultdict , gene_binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess gene prioritisation for a Phenopacket by comparing PhEval standardised gene results against the recorded causative genes for a proband in the Phenopacket. Args: phenopacket_path (Path): Path to the Phenopacket. score_order (str): The order in which scores are arranged, either ascending or descending. results_dir_and_input (TrackInputOutputDirectories): Input and output directories. threshold (float): Threshold for assessment. gene_rank_stats (RankStats): RankStats class instance. gene_rank_comparison (defaultdict): Default dictionary for gene rank comparisons. gene_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" standardised_gene_result = results_dir_and_input . results_dir . joinpath ( f \"pheval_gene_results/ { phenopacket_path . stem } -pheval_gene_result.tsv\" ) pheval_gene_result = read_standardised_result ( standardised_gene_result ) proband_causative_genes = _obtain_causative_genes ( phenopacket_path ) AssessGenePrioritisation ( phenopacket_path , results_dir_and_input . results_dir . joinpath ( \"pheval_gene_results/\" ), parse_pheval_result ( RankedPhEvalGeneResult , pheval_gene_result ), threshold , score_order , proband_causative_genes , ) . assess_gene_prioritisation ( gene_rank_stats , gene_rank_comparison , gene_binary_classification_stats ) benchmark_gene_prioritisation ( results_directory_and_input , score_order , threshold , gene_rank_comparison ) Benchmark a directory based on gene prioritisation results. Args: results_directory_and_input (TrackInputOutputDirectories): Input and output directories. score_order (str): The order in which scores are arranged. threshold (float): Threshold for assessment. gene_rank_comparison (defaultdict): Default dictionary for gene rank comparisons. Returns: BenchmarkRunResults: An object containing benchmarking results for gene prioritisation, including ranks and rank statistics for the benchmarked directory. Source code in src/pheval/analyse/gene_prioritisation_analysis.py 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 def benchmark_gene_prioritisation ( results_directory_and_input : TrackInputOutputDirectories , score_order : str , threshold : float , gene_rank_comparison : defaultdict , ) -> BenchmarkRunResults : \"\"\" Benchmark a directory based on gene prioritisation results. Args: results_directory_and_input (TrackInputOutputDirectories): Input and output directories. score_order (str): The order in which scores are arranged. threshold (float): Threshold for assessment. gene_rank_comparison (defaultdict): Default dictionary for gene rank comparisons. Returns: BenchmarkRunResults: An object containing benchmarking results for gene prioritisation, including ranks and rank statistics for the benchmarked directory. \"\"\" gene_rank_stats = RankStats () gene_binary_classification_stats = BinaryClassificationStats () for phenopacket_path in all_files ( results_directory_and_input . phenopacket_dir ): assess_phenopacket_gene_prioritisation ( phenopacket_path , score_order , results_directory_and_input , threshold , gene_rank_stats , gene_rank_comparison , gene_binary_classification_stats , ) return BenchmarkRunResults ( results_dir = results_directory_and_input . results_dir , ranks = gene_rank_comparison , rank_stats = gene_rank_stats , binary_classification_stats = gene_binary_classification_stats , )","title":"Gene prioritisation analysis"},{"location":"api/pheval/analyse/gene_prioritisation_analysis/#src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation","text":"Class for assessing gene prioritisation based on thresholds and scoring orders. Source code in src/pheval/analyse/gene_prioritisation_analysis.py 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 class AssessGenePrioritisation : \"\"\"Class for assessing gene prioritisation based on thresholds and scoring orders.\"\"\" def __init__ ( self , phenopacket_path : Path , results_dir : Path , standardised_gene_results : List [ RankedPhEvalGeneResult ], threshold : float , score_order : str , proband_causative_genes : List [ ProbandCausativeGene ], ): \"\"\" Initialise AssessGenePrioritisation class. Args: phenopacket_path (Path): Path to the phenopacket file results_dir (Path): Path to the results directory standardised_gene_results (List[RankedPhEvalGeneResult]): List of ranked PhEval gene results threshold (float): Threshold for scores score_order (str): Score order for results, either ascending or descending proband_causative_genes (List[ProbandCausativeGene]): List of proband causative genes \"\"\" self . phenopacket_path = phenopacket_path self . results_dir = results_dir self . standardised_gene_results = standardised_gene_results self . threshold = threshold self . score_order = score_order self . proband_causative_genes = proband_causative_genes def _record_gene_prioritisation_match ( self , gene : ProbandCausativeGene , result_entry : RankedPhEvalGeneResult , rank_stats : RankStats , ) -> GenePrioritisationResult : \"\"\" Record the gene prioritisation rank if found within the results Args: gene (ProbandCausativeGene): Diagnosed proband gene result_entry (RankedPhEvalGeneResult): Ranked PhEval gene result entry rank_stats (RankStats): RankStats class instance Returns: GenePrioritisationResult: Recorded correct gene prioritisation rank result \"\"\" rank = result_entry . rank rank_stats . add_rank ( rank ) return GenePrioritisationResult ( self . phenopacket_path , gene . gene_symbol , rank ) def _assess_gene_with_threshold_ascending_order ( self , result_entry : RankedPhEvalGeneResult , gene : ProbandCausativeGene , rank_stats : RankStats , ) -> GenePrioritisationResult : \"\"\" Record the gene prioritisation rank if it meets the ascending order threshold. This method checks if the gene prioritisation rank meets the ascending order threshold. If the score of the result entry is less than the threshold, it records the gene rank. Args: result_entry (RankedPhEvalGeneResult): Ranked PhEval gene result entry gene (ProbandCausativeGene): Diagnosed proband gene rank_stats (RankStats): RankStats class instance Returns: GenePrioritisationResult: Recorded correct gene prioritisation rank result \"\"\" if float ( self . threshold ) > float ( result_entry . score ): return self . _record_gene_prioritisation_match ( gene , result_entry , rank_stats ) def _assess_gene_with_threshold ( self , result_entry : RankedPhEvalGeneResult , gene : ProbandCausativeGene , rank_stats : RankStats , ) -> GenePrioritisationResult : \"\"\" Record the gene prioritisation rank if it meets the score threshold. This method checks if the gene prioritisation rank meets the score threshold. If the score of the result entry is greater than the threshold, it records the gene rank. Args: result_entry (RankedPhEvalResult): Ranked PhEval gene result entry gene (ProbandCausativeGene): Diagnosed proband gene rank_stats (RankStats): RankStats class instance Returns: GenePrioritisationResult: Recorded correct gene prioritisation rank result \"\"\" if float ( self . threshold ) < float ( result_entry . score ): return self . _record_gene_prioritisation_match ( gene , result_entry , rank_stats ) def _record_matched_gene ( self , gene : ProbandCausativeGene , rank_stats : RankStats , standardised_gene_result : RankedPhEvalGeneResult , ) -> GenePrioritisationResult : \"\"\" Return the gene rank result - handling the specification of a threshold. This method determines and returns the gene rank result based on the specified threshold and score order. If the threshold is 0.0, it records the gene rank directly. Otherwise, it assesses the gene with the threshold based on the score order. Args: gene (ProbandCausativeGene): Diagnosed proband gene rank_stats (RankStats): RankStats class instance standardised_gene_result (RankedPhEvalGeneResult): Ranked PhEval gene result entry Returns: GenePrioritisationResult: Recorded correct gene prioritisation rank result \"\"\" if float ( self . threshold ) == 0.0 : return self . _record_gene_prioritisation_match ( gene , standardised_gene_result , rank_stats ) else : return ( self . _assess_gene_with_threshold ( standardised_gene_result , gene , rank_stats ) if self . score_order != \"ascending\" else self . _assess_gene_with_threshold_ascending_order ( standardised_gene_result , gene , rank_stats ) ) @staticmethod def _check_string_representation ( entity : str ) -> Union [ List [ str ], str ]: \"\"\" Check if the input string is a representation of a list and returns the list if true, otherwise the string. Args: entity (str): The input entity to check. Returns: Union[List[str], str]: A list if the input string is a list representation, otherwise the original string. \"\"\" list_pattern = re . compile ( r \"^\\[\\s*(?:[^\\[\\],\\s]+(?:\\s*,\\s*[^\\[\\],\\s]+)*)?\\s*\\]$\" ) if list_pattern . match ( str ( entity )): return ast . literal_eval ( entity ) else : return entity def assess_gene_prioritisation ( self , rank_stats : RankStats , rank_records : defaultdict , binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess gene prioritisation. This method assesses the prioritisation of genes based on the provided criteria and records ranks using a PrioritisationRankRecorder. Args: rank_stats (RankStats): RankStats class instance rank_records (defaultdict): A defaultdict to store the correct ranked results. binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" relevant_ranks = [] for gene in self . proband_causative_genes : rank_stats . total += 1 gene_match = GenePrioritisationResult ( self . phenopacket_path , gene . gene_symbol ) for standardised_gene_result in self . standardised_gene_results : gene_identifier = self . _check_string_representation ( standardised_gene_result . gene_identifier ) gene_symbol = self . _check_string_representation ( standardised_gene_result . gene_symbol ) if ( isinstance ( gene_identifier , list ) and gene . gene_identifier in gene_identifier or isinstance ( gene_identifier , str ) and gene . gene_identifier == str or isinstance ( gene_symbol , list ) and gene . gene_symbol in gene_symbol or isinstance ( gene_symbol , str ) and gene . gene_symbol == gene_symbol ): gene_match = self . _record_matched_gene ( gene , rank_stats , standardised_gene_result ) ( relevant_ranks . append ( gene_match . rank ) if gene_match else relevant_ranks . append ( 0 ) ) break PrioritisationRankRecorder ( rank_stats . total , self . results_dir , ( GenePrioritisationResult ( self . phenopacket_path , gene . gene_symbol ) if gene_match is None else gene_match ), rank_records , ) . record_rank () rank_stats . relevant_result_ranks . append ( relevant_ranks ) binary_classification_stats . add_classification ( pheval_results = self . standardised_gene_results , relevant_ranks = relevant_ranks )","title":"AssessGenePrioritisation"},{"location":"api/pheval/analyse/gene_prioritisation_analysis/#src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation.__init__","text":"Initialise AssessGenePrioritisation class. Parameters: Name Type Description Default phenopacket_path Path Path to the phenopacket file required results_dir Path Path to the results directory required standardised_gene_results List [ RankedPhEvalGeneResult ] List of ranked PhEval gene results required threshold float Threshold for scores required score_order str Score order for results, either ascending or descending required proband_causative_genes List [ ProbandCausativeGene ] List of proband causative genes required Source code in src/pheval/analyse/gene_prioritisation_analysis.py 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 def __init__ ( self , phenopacket_path : Path , results_dir : Path , standardised_gene_results : List [ RankedPhEvalGeneResult ], threshold : float , score_order : str , proband_causative_genes : List [ ProbandCausativeGene ], ): \"\"\" Initialise AssessGenePrioritisation class. Args: phenopacket_path (Path): Path to the phenopacket file results_dir (Path): Path to the results directory standardised_gene_results (List[RankedPhEvalGeneResult]): List of ranked PhEval gene results threshold (float): Threshold for scores score_order (str): Score order for results, either ascending or descending proband_causative_genes (List[ProbandCausativeGene]): List of proband causative genes \"\"\" self . phenopacket_path = phenopacket_path self . results_dir = results_dir self . standardised_gene_results = standardised_gene_results self . threshold = threshold self . score_order = score_order self . proband_causative_genes = proband_causative_genes","title":"__init__"},{"location":"api/pheval/analyse/gene_prioritisation_analysis/#src.pheval.analyse.gene_prioritisation_analysis.AssessGenePrioritisation.assess_gene_prioritisation","text":"Assess gene prioritisation. This method assesses the prioritisation of genes based on the provided criteria and records ranks using a PrioritisationRankRecorder. Parameters: Name Type Description Default rank_stats RankStats RankStats class instance required rank_records defaultdict A defaultdict to store the correct ranked results. required binary_classification_stats BinaryClassificationStats BinaryClassificationStats class instance. required Source code in src/pheval/analyse/gene_prioritisation_analysis.py 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 def assess_gene_prioritisation ( self , rank_stats : RankStats , rank_records : defaultdict , binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess gene prioritisation. This method assesses the prioritisation of genes based on the provided criteria and records ranks using a PrioritisationRankRecorder. Args: rank_stats (RankStats): RankStats class instance rank_records (defaultdict): A defaultdict to store the correct ranked results. binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" relevant_ranks = [] for gene in self . proband_causative_genes : rank_stats . total += 1 gene_match = GenePrioritisationResult ( self . phenopacket_path , gene . gene_symbol ) for standardised_gene_result in self . standardised_gene_results : gene_identifier = self . _check_string_representation ( standardised_gene_result . gene_identifier ) gene_symbol = self . _check_string_representation ( standardised_gene_result . gene_symbol ) if ( isinstance ( gene_identifier , list ) and gene . gene_identifier in gene_identifier or isinstance ( gene_identifier , str ) and gene . gene_identifier == str or isinstance ( gene_symbol , list ) and gene . gene_symbol in gene_symbol or isinstance ( gene_symbol , str ) and gene . gene_symbol == gene_symbol ): gene_match = self . _record_matched_gene ( gene , rank_stats , standardised_gene_result ) ( relevant_ranks . append ( gene_match . rank ) if gene_match else relevant_ranks . append ( 0 ) ) break PrioritisationRankRecorder ( rank_stats . total , self . results_dir , ( GenePrioritisationResult ( self . phenopacket_path , gene . gene_symbol ) if gene_match is None else gene_match ), rank_records , ) . record_rank () rank_stats . relevant_result_ranks . append ( relevant_ranks ) binary_classification_stats . add_classification ( pheval_results = self . standardised_gene_results , relevant_ranks = relevant_ranks )","title":"assess_gene_prioritisation"},{"location":"api/pheval/analyse/gene_prioritisation_analysis/#src.pheval.analyse.gene_prioritisation_analysis.assess_phenopacket_gene_prioritisation","text":"Assess gene prioritisation for a Phenopacket by comparing PhEval standardised gene results against the recorded causative genes for a proband in the Phenopacket. Parameters: Name Type Description Default phenopacket_path Path Path to the Phenopacket. required score_order str The order in which scores are arranged, either ascending or descending. required results_dir_and_input TrackInputOutputDirectories Input and output directories. required threshold float Threshold for assessment. required gene_rank_stats RankStats RankStats class instance. required gene_rank_comparison defaultdict Default dictionary for gene rank comparisons. required gene_binary_classification_stats BinaryClassificationStats BinaryClassificationStats class instance. required Source code in src/pheval/analyse/gene_prioritisation_analysis.py 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 def assess_phenopacket_gene_prioritisation ( phenopacket_path : Path , score_order : str , results_dir_and_input : TrackInputOutputDirectories , threshold : float , gene_rank_stats : RankStats , gene_rank_comparison : defaultdict , gene_binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess gene prioritisation for a Phenopacket by comparing PhEval standardised gene results against the recorded causative genes for a proband in the Phenopacket. Args: phenopacket_path (Path): Path to the Phenopacket. score_order (str): The order in which scores are arranged, either ascending or descending. results_dir_and_input (TrackInputOutputDirectories): Input and output directories. threshold (float): Threshold for assessment. gene_rank_stats (RankStats): RankStats class instance. gene_rank_comparison (defaultdict): Default dictionary for gene rank comparisons. gene_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" standardised_gene_result = results_dir_and_input . results_dir . joinpath ( f \"pheval_gene_results/ { phenopacket_path . stem } -pheval_gene_result.tsv\" ) pheval_gene_result = read_standardised_result ( standardised_gene_result ) proband_causative_genes = _obtain_causative_genes ( phenopacket_path ) AssessGenePrioritisation ( phenopacket_path , results_dir_and_input . results_dir . joinpath ( \"pheval_gene_results/\" ), parse_pheval_result ( RankedPhEvalGeneResult , pheval_gene_result ), threshold , score_order , proband_causative_genes , ) . assess_gene_prioritisation ( gene_rank_stats , gene_rank_comparison , gene_binary_classification_stats )","title":"assess_phenopacket_gene_prioritisation"},{"location":"api/pheval/analyse/gene_prioritisation_analysis/#src.pheval.analyse.gene_prioritisation_analysis.benchmark_gene_prioritisation","text":"Benchmark a directory based on gene prioritisation results. Args: results_directory_and_input (TrackInputOutputDirectories): Input and output directories. score_order (str): The order in which scores are arranged. threshold (float): Threshold for assessment. gene_rank_comparison (defaultdict): Default dictionary for gene rank comparisons. Returns: BenchmarkRunResults: An object containing benchmarking results for gene prioritisation, including ranks and rank statistics for the benchmarked directory. Source code in src/pheval/analyse/gene_prioritisation_analysis.py 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 def benchmark_gene_prioritisation ( results_directory_and_input : TrackInputOutputDirectories , score_order : str , threshold : float , gene_rank_comparison : defaultdict , ) -> BenchmarkRunResults : \"\"\" Benchmark a directory based on gene prioritisation results. Args: results_directory_and_input (TrackInputOutputDirectories): Input and output directories. score_order (str): The order in which scores are arranged. threshold (float): Threshold for assessment. gene_rank_comparison (defaultdict): Default dictionary for gene rank comparisons. Returns: BenchmarkRunResults: An object containing benchmarking results for gene prioritisation, including ranks and rank statistics for the benchmarked directory. \"\"\" gene_rank_stats = RankStats () gene_binary_classification_stats = BinaryClassificationStats () for phenopacket_path in all_files ( results_directory_and_input . phenopacket_dir ): assess_phenopacket_gene_prioritisation ( phenopacket_path , score_order , results_directory_and_input , threshold , gene_rank_stats , gene_rank_comparison , gene_binary_classification_stats , ) return BenchmarkRunResults ( results_dir = results_directory_and_input . results_dir , ranks = gene_rank_comparison , rank_stats = gene_rank_stats , binary_classification_stats = gene_binary_classification_stats , )","title":"benchmark_gene_prioritisation"},{"location":"api/pheval/analyse/generate_plots/","text":"PlotGenerator Class to generate plots. Source code in src/pheval/analyse/generate_plots.py 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 class PlotGenerator : \"\"\"Class to generate plots.\"\"\" palette_hex_codes = [ \"#f4ae3d\" , \"#ee5825\" , \"#2b7288\" , \"#9a84b2\" , \"#0c604c\" , \"#c94c4c\" , \"#3d8e83\" , \"#725ac1\" , \"#e7ba52\" , \"#1b9e77\" , ] def __init__ ( self , ): \"\"\" Initialise the PlotGenerator class. Note: `self.stats` will be used to store statistics data. `self.mrr` will store Mean Reciprocal Rank (MRR) values. Matplotlib settings are configured to remove the right and top axes spines for generated plots. \"\"\" self . stats , self . mrr = [], [] matplotlib . rcParams [ \"axes.spines.right\" ] = False matplotlib . rcParams [ \"axes.spines.top\" ] = False @staticmethod def _create_run_identifier ( results_dir : Path ) -> str : \"\"\" Create a run identifier from a path. Args: results_dir (Path): The directory path for results. Returns: str: A string representing the run identifier created from the given path. \"\"\" return f \" { Path ( results_dir ) . parents [ 0 ] . name } _ { trim_corpus_results_directory_suffix ( Path ( results_dir ) . name ) } \" def return_benchmark_name ( self , benchmark_result : BenchmarkRunResults ) -> str : \"\"\" Return the benchmark name for a run. Args: benchmark_result (BenchmarkRunResults): The benchmarking results for a run. Returns: str: The benchmark name obtained from the given BenchmarkRunResults instance. \"\"\" return ( benchmark_result . benchmark_name if benchmark_result . results_dir is None else self . _create_run_identifier ( benchmark_result . results_dir ) ) def _generate_stacked_bar_plot_data ( self , benchmark_result : BenchmarkRunResults ) -> None : \"\"\" Generate data in the correct format for dataframe creation for a stacked bar plot, appending to the self.stats attribute of the class. Args: benchmark_result (BenchmarkRunResults): The benchmarking results for a run. \"\"\" rank_stats = benchmark_result . rank_stats self . stats . append ( { \"Run\" : self . return_benchmark_name ( benchmark_result ), \"Top\" : benchmark_result . rank_stats . percentage_top (), \"2-3\" : rank_stats . percentage_difference ( rank_stats . percentage_top3 (), rank_stats . percentage_top () ), \"4-5\" : rank_stats . percentage_difference ( rank_stats . percentage_top5 (), rank_stats . percentage_top3 () ), \"6-10\" : rank_stats . percentage_difference ( rank_stats . percentage_top10 (), rank_stats . percentage_top5 () ), \">10\" : rank_stats . percentage_difference ( rank_stats . percentage_found (), rank_stats . percentage_top10 () ), \"Missed\" : rank_stats . percentage_difference ( 100 , rank_stats . percentage_found ()), } ) def _generate_stats_mrr_bar_plot_data ( self , benchmark_result : BenchmarkRunResults ) -> None : \"\"\" Generate data in the correct format for dataframe creation for MRR (Mean Reciprocal Rank) bar plot, appending to the self.mrr attribute of the class. Args: benchmark_result (BenchmarkRunResults): The benchmarking results for a run. \"\"\" self . mrr . extend ( [ { \"Rank\" : \"MRR\" , \"Percentage\" : benchmark_result . rank_stats . return_mean_reciprocal_rank (), \"Run\" : self . return_benchmark_name ( benchmark_result ), } ] ) def generate_stacked_bar_plot ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , title : str = None , ) -> None : \"\"\" Generate a stacked bar plot and Mean Reciprocal Rank (MRR) bar plot. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. title (str, optional): Title for the generated plot. Defaults to None. \"\"\" for benchmark_result in benchmarking_results : self . _generate_stacked_bar_plot_data ( benchmark_result ) self . _generate_stats_mrr_bar_plot_data ( benchmark_result ) stats_df = pd . DataFrame ( self . stats ) stats_df . set_index ( \"Run\" ) . plot ( kind = \"bar\" , stacked = True , color = self . palette_hex_codes , ylabel = benchmark_generator . y_label , edgecolor = \"white\" , ) . legend ( loc = \"center left\" , bbox_to_anchor = ( 1.0 , 0.5 )) if title is None : plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } Rank Stats\" ) else : plt . title ( title , loc = \"center\" , fontsize = 15 ) plt . ylim ( 0 , 100 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _rank_stats.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) mrr_df = pd . DataFrame ( self . mrr ) mrr_df . set_index ( \"Run\" ) . plot ( kind = \"bar\" , color = self . palette_hex_codes , ylabel = f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } mean reciprocal rank\" , legend = False , edgecolor = \"white\" , ) plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } results - mean reciprocal rank\" ) plt . ylim ( 0 , 1 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _mrr.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) def _generate_cumulative_bar_plot_data ( self , benchmark_result : BenchmarkRunResults ): \"\"\" Generate data in the correct format for dataframe creation for a cumulative bar plot, appending to the self.stats attribute of the class. Args: benchmark_result (BenchmarkRunResults): The benchmarking results for a run. \"\"\" rank_stats = benchmark_result . rank_stats run_identifier = self . return_benchmark_name ( benchmark_result ) self . stats . extend ( [ { \"Rank\" : \"Top\" , \"Percentage\" : rank_stats . percentage_top () / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"Top3\" , \"Percentage\" : rank_stats . percentage_top3 () / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"Top5\" , \"Percentage\" : rank_stats . percentage_top5 () / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"Top10\" , \"Percentage\" : rank_stats . percentage_top10 () / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"Found\" , \"Percentage\" : rank_stats . percentage_found () / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"Missed\" , \"Percentage\" : rank_stats . percentage_difference ( 100 , rank_stats . percentage_found () ) / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"MRR\" , \"Percentage\" : rank_stats . return_mean_reciprocal_rank (), \"Run\" : run_identifier , }, ] ) def generate_cumulative_bar ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , title : str = None , ) -> None : \"\"\" Generate a cumulative bar plot. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. title (str, optional): Title for the generated plot. Defaults to None. \"\"\" for benchmark_result in benchmarking_results : self . _generate_cumulative_bar_plot_data ( benchmark_result ) stats_df = pd . DataFrame ( self . stats ) sns . catplot ( data = stats_df , kind = \"bar\" , x = \"Rank\" , y = \"Percentage\" , hue = \"Run\" , palette = self . palette_hex_codes , edgecolor = \"white\" , legend = False , ) . set ( xlabel = \"Rank\" , ylabel = benchmark_generator . y_label ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 ), ncol = 3 , title = \"Run\" ) if title is None : plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } Cumulative Rank Stats\" ) else : plt . title ( title , loc = \"center\" , fontsize = 15 ) plt . ylim ( 0 , 1 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _rank_stats.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) def _generate_non_cumulative_bar_plot_data ( self , benchmark_result : BenchmarkRunResults ) -> [ dict ]: \"\"\" Generate data in the correct format for dataframe creation for a non-cumulative bar plot, appending to the self.stats attribute of the class. Args: benchmark_result (BenchmarkRunResults): The benchmarking results for a run. \"\"\" rank_stats = benchmark_result . rank_stats run_identifier = self . return_benchmark_name ( benchmark_result ) self . stats . extend ( [ { \"Rank\" : \"Top\" , \"Percentage\" : rank_stats . percentage_top () / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"2-3\" , \"Percentage\" : rank_stats . percentage_difference ( rank_stats . percentage_top3 (), rank_stats . percentage_top () ) / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"4-5\" , \"Percentage\" : rank_stats . percentage_difference ( rank_stats . percentage_top5 (), rank_stats . percentage_top3 () ) / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"6-10\" , \"Percentage\" : rank_stats . percentage_difference ( rank_stats . percentage_top10 (), rank_stats . percentage_top5 () ) / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \">10\" , \"Percentage\" : rank_stats . percentage_difference ( rank_stats . percentage_found (), rank_stats . percentage_top10 () ) / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"Missed\" , \"Percentage\" : rank_stats . percentage_difference ( 100 , rank_stats . percentage_found () ) / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"MRR\" , \"Percentage\" : rank_stats . return_mean_reciprocal_rank (), \"Run\" : run_identifier , }, ] ) def generate_roc_curve ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , ): \"\"\" Generate and plot Receiver Operating Characteristic (ROC) curves for binary classification benchmark results. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. \"\"\" for i , benchmark_result in enumerate ( benchmarking_results ): fpr , tpr , thresh = roc_curve ( benchmark_result . binary_classification_stats . labels , benchmark_result . binary_classification_stats . scores , pos_label = 1 , ) roc_auc = auc ( fpr , tpr ) plt . plot ( fpr , tpr , label = f \" { self . return_benchmark_name ( benchmark_result ) } ROC Curve (AUC = { roc_auc : .2f } )\" , color = self . palette_hex_codes [ i ], ) plt . plot ( linestyle = \"--\" , color = \"gray\" ) plt . xlabel ( \"False Positive Rate\" ) plt . ylabel ( \"True Positive Rate\" ) plt . title ( \"Receiver Operating Characteristic (ROC) Curve\" ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 )) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _roc_curve.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) def generate_precision_recall ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , ): \"\"\" Generate and plot Precision-Recall curves for binary classification benchmark results. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. \"\"\" plt . figure () for i , benchmark_result in enumerate ( benchmarking_results ): precision , recall , thresh = precision_recall_curve ( benchmark_result . binary_classification_stats . labels , benchmark_result . binary_classification_stats . scores , ) precision_recall_auc = auc ( recall , precision ) plt . plot ( recall , precision , label = f \" { self . return_benchmark_name ( benchmark_result ) } Precision-Recall Curve \" f \"(AUC = { precision_recall_auc : .2f } )\" , color = self . palette_hex_codes [ i ], ) plt . plot ( linestyle = \"--\" , color = \"gray\" ) plt . xlabel ( \"Recall\" ) plt . ylabel ( \"Precision\" ) plt . title ( \"Precision-Recall Curve\" ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 )) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _precision_recall_curve.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) def generate_non_cumulative_bar ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , title : str = None , ) -> None : \"\"\" Generate a non-cumulative bar plot. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. title (str, optional): Title for the generated plot. Defaults to None. \"\"\" for benchmark_result in benchmarking_results : self . _generate_non_cumulative_bar_plot_data ( benchmark_result ) stats_df = pd . DataFrame ( self . stats ) sns . catplot ( data = stats_df , kind = \"bar\" , x = \"Rank\" , y = \"Percentage\" , hue = \"Run\" , palette = self . palette_hex_codes , edgecolor = \"white\" , legend = False , ) . set ( xlabel = \"Rank\" , ylabel = benchmark_generator . y_label ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 ), ncol = 3 , title = \"Run\" ) if title is None : plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } Non-Cumulative Rank Stats\" ) else : plt . title ( title , loc = \"center\" , fontsize = 15 ) plt . ylim ( 0 , 1 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _rank_stats.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) __init__ () Initialise the PlotGenerator class. Note: self.stats will be used to store statistics data. self.mrr will store Mean Reciprocal Rank (MRR) values. Matplotlib settings are configured to remove the right and top axes spines for generated plots. Source code in src/pheval/analyse/generate_plots.py 53 54 55 56 57 58 59 60 61 62 63 64 65 66 def __init__ ( self , ): \"\"\" Initialise the PlotGenerator class. Note: `self.stats` will be used to store statistics data. `self.mrr` will store Mean Reciprocal Rank (MRR) values. Matplotlib settings are configured to remove the right and top axes spines for generated plots. \"\"\" self . stats , self . mrr = [], [] matplotlib . rcParams [ \"axes.spines.right\" ] = False matplotlib . rcParams [ \"axes.spines.top\" ] = False generate_cumulative_bar ( benchmarking_results , benchmark_generator , title = None ) Generate a cumulative bar plot. Parameters: Name Type Description Default benchmarking_results List [ BenchmarkRunResults ] List of benchmarking results for multiple runs. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required title str Title for the generated plot. Defaults to None. None Source code in src/pheval/analyse/generate_plots.py 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 def generate_cumulative_bar ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , title : str = None , ) -> None : \"\"\" Generate a cumulative bar plot. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. title (str, optional): Title for the generated plot. Defaults to None. \"\"\" for benchmark_result in benchmarking_results : self . _generate_cumulative_bar_plot_data ( benchmark_result ) stats_df = pd . DataFrame ( self . stats ) sns . catplot ( data = stats_df , kind = \"bar\" , x = \"Rank\" , y = \"Percentage\" , hue = \"Run\" , palette = self . palette_hex_codes , edgecolor = \"white\" , legend = False , ) . set ( xlabel = \"Rank\" , ylabel = benchmark_generator . y_label ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 ), ncol = 3 , title = \"Run\" ) if title is None : plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } Cumulative Rank Stats\" ) else : plt . title ( title , loc = \"center\" , fontsize = 15 ) plt . ylim ( 0 , 1 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _rank_stats.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) generate_non_cumulative_bar ( benchmarking_results , benchmark_generator , title = None ) Generate a non-cumulative bar plot. Parameters: Name Type Description Default benchmarking_results List [ BenchmarkRunResults ] List of benchmarking results for multiple runs. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required title str Title for the generated plot. Defaults to None. None Source code in src/pheval/analyse/generate_plots.py 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 def generate_non_cumulative_bar ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , title : str = None , ) -> None : \"\"\" Generate a non-cumulative bar plot. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. title (str, optional): Title for the generated plot. Defaults to None. \"\"\" for benchmark_result in benchmarking_results : self . _generate_non_cumulative_bar_plot_data ( benchmark_result ) stats_df = pd . DataFrame ( self . stats ) sns . catplot ( data = stats_df , kind = \"bar\" , x = \"Rank\" , y = \"Percentage\" , hue = \"Run\" , palette = self . palette_hex_codes , edgecolor = \"white\" , legend = False , ) . set ( xlabel = \"Rank\" , ylabel = benchmark_generator . y_label ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 ), ncol = 3 , title = \"Run\" ) if title is None : plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } Non-Cumulative Rank Stats\" ) else : plt . title ( title , loc = \"center\" , fontsize = 15 ) plt . ylim ( 0 , 1 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _rank_stats.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) generate_precision_recall ( benchmarking_results , benchmark_generator ) Generate and plot Precision-Recall curves for binary classification benchmark results. Parameters: Name Type Description Default benchmarking_results List [ BenchmarkRunResults ] List of benchmarking results for multiple runs. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required Source code in src/pheval/analyse/generate_plots.py 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 def generate_precision_recall ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , ): \"\"\" Generate and plot Precision-Recall curves for binary classification benchmark results. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. \"\"\" plt . figure () for i , benchmark_result in enumerate ( benchmarking_results ): precision , recall , thresh = precision_recall_curve ( benchmark_result . binary_classification_stats . labels , benchmark_result . binary_classification_stats . scores , ) precision_recall_auc = auc ( recall , precision ) plt . plot ( recall , precision , label = f \" { self . return_benchmark_name ( benchmark_result ) } Precision-Recall Curve \" f \"(AUC = { precision_recall_auc : .2f } )\" , color = self . palette_hex_codes [ i ], ) plt . plot ( linestyle = \"--\" , color = \"gray\" ) plt . xlabel ( \"Recall\" ) plt . ylabel ( \"Precision\" ) plt . title ( \"Precision-Recall Curve\" ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 )) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _precision_recall_curve.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) generate_roc_curve ( benchmarking_results , benchmark_generator ) Generate and plot Receiver Operating Characteristic (ROC) curves for binary classification benchmark results. Parameters: Name Type Description Default benchmarking_results List [ BenchmarkRunResults ] List of benchmarking results for multiple runs. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required Source code in src/pheval/analyse/generate_plots.py 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 def generate_roc_curve ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , ): \"\"\" Generate and plot Receiver Operating Characteristic (ROC) curves for binary classification benchmark results. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. \"\"\" for i , benchmark_result in enumerate ( benchmarking_results ): fpr , tpr , thresh = roc_curve ( benchmark_result . binary_classification_stats . labels , benchmark_result . binary_classification_stats . scores , pos_label = 1 , ) roc_auc = auc ( fpr , tpr ) plt . plot ( fpr , tpr , label = f \" { self . return_benchmark_name ( benchmark_result ) } ROC Curve (AUC = { roc_auc : .2f } )\" , color = self . palette_hex_codes [ i ], ) plt . plot ( linestyle = \"--\" , color = \"gray\" ) plt . xlabel ( \"False Positive Rate\" ) plt . ylabel ( \"True Positive Rate\" ) plt . title ( \"Receiver Operating Characteristic (ROC) Curve\" ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 )) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _roc_curve.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) generate_stacked_bar_plot ( benchmarking_results , benchmark_generator , title = None ) Generate a stacked bar plot and Mean Reciprocal Rank (MRR) bar plot. Parameters: Name Type Description Default benchmarking_results List [ BenchmarkRunResults ] List of benchmarking results for multiple runs. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required title str Title for the generated plot. Defaults to None. None Source code in src/pheval/analyse/generate_plots.py 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 def generate_stacked_bar_plot ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , title : str = None , ) -> None : \"\"\" Generate a stacked bar plot and Mean Reciprocal Rank (MRR) bar plot. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. title (str, optional): Title for the generated plot. Defaults to None. \"\"\" for benchmark_result in benchmarking_results : self . _generate_stacked_bar_plot_data ( benchmark_result ) self . _generate_stats_mrr_bar_plot_data ( benchmark_result ) stats_df = pd . DataFrame ( self . stats ) stats_df . set_index ( \"Run\" ) . plot ( kind = \"bar\" , stacked = True , color = self . palette_hex_codes , ylabel = benchmark_generator . y_label , edgecolor = \"white\" , ) . legend ( loc = \"center left\" , bbox_to_anchor = ( 1.0 , 0.5 )) if title is None : plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } Rank Stats\" ) else : plt . title ( title , loc = \"center\" , fontsize = 15 ) plt . ylim ( 0 , 100 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _rank_stats.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) mrr_df = pd . DataFrame ( self . mrr ) mrr_df . set_index ( \"Run\" ) . plot ( kind = \"bar\" , color = self . palette_hex_codes , ylabel = f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } mean reciprocal rank\" , legend = False , edgecolor = \"white\" , ) plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } results - mean reciprocal rank\" ) plt . ylim ( 0 , 1 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _mrr.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) return_benchmark_name ( benchmark_result ) Return the benchmark name for a run. Parameters: Name Type Description Default benchmark_result BenchmarkRunResults The benchmarking results for a run. required Returns: Name Type Description str str The benchmark name obtained from the given BenchmarkRunResults instance. Source code in src/pheval/analyse/generate_plots.py 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 def return_benchmark_name ( self , benchmark_result : BenchmarkRunResults ) -> str : \"\"\" Return the benchmark name for a run. Args: benchmark_result (BenchmarkRunResults): The benchmarking results for a run. Returns: str: The benchmark name obtained from the given BenchmarkRunResults instance. \"\"\" return ( benchmark_result . benchmark_name if benchmark_result . results_dir is None else self . _create_run_identifier ( benchmark_result . results_dir ) ) generate_plots ( benchmarking_results , benchmark_generator , plot_type , title = None , generate_from_tsv = False ) Generate summary statistics bar plots for prioritisation. This method generates summary statistics bar plots based on the provided benchmarking results and plot type. Parameters: Name Type Description Default benchmarking_results list [ BenchmarkRunResults ] List of benchmarking results for multiple runs. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required plot_type str Type of plot to be generated (\"bar_stacked\", \"bar_cumulative\", \"bar_non_cumulative\"). required title str Title for the generated plot. Defaults to None. None generate_from_tsv bool Specify whether to generate plots from the TSV file. Defaults to False. False Source code in src/pheval/analyse/generate_plots.py 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 def generate_plots ( benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , plot_type : str , title : str = None , generate_from_tsv : bool = False , ) -> None : \"\"\" Generate summary statistics bar plots for prioritisation. This method generates summary statistics bar plots based on the provided benchmarking results and plot type. Args: benchmarking_results (list[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. plot_type (str): Type of plot to be generated (\"bar_stacked\", \"bar_cumulative\", \"bar_non_cumulative\"). title (str, optional): Title for the generated plot. Defaults to None. generate_from_tsv (bool): Specify whether to generate plots from the TSV file. Defaults to False. \"\"\" plot_generator = PlotGenerator () if not generate_from_tsv : plot_generator . generate_roc_curve ( benchmarking_results , benchmark_generator ) plot_generator . generate_precision_recall ( benchmarking_results , benchmark_generator ) if plot_type == \"bar_stacked\" : plot_generator . generate_stacked_bar_plot ( benchmarking_results , benchmark_generator , title ) elif plot_type == \"bar_cumulative\" : plot_generator . generate_cumulative_bar ( benchmarking_results , benchmark_generator , title ) elif plot_type == \"bar_non_cumulative\" : plot_generator . generate_non_cumulative_bar ( benchmarking_results , benchmark_generator , title ) generate_plots_from_benchmark_summary_tsv ( benchmark_summary_tsv , gene_analysis , variant_analysis , disease_analysis , plot_type , title ) Generate bar plot from summary benchmark results. Reads a summary of benchmark results from a TSV file and generates a bar plot based on the analysis type and plot type. Parameters: Name Type Description Default benchmark_summary_tsv Path Path to the summary TSV file containing benchmark results. required gene_analysis bool Flag indicating whether to analyse gene prioritisation. required variant_analysis bool Flag indicating whether to analyse variant prioritisation. required disease_analysis bool Flag indicating whether to analyse disease prioritisation. required plot_type str Type of plot to be generated (\"bar_stacked\", \"bar_cumulative\", \"bar_non_cumulative\"). required title str Title for the generated plot. required Raises: ValueError: If an unsupported plot type is specified. Source code in src/pheval/analyse/generate_plots.py 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 def generate_plots_from_benchmark_summary_tsv ( benchmark_summary_tsv : Path , gene_analysis : bool , variant_analysis : bool , disease_analysis : bool , plot_type : str , title : str , ): \"\"\" Generate bar plot from summary benchmark results. Reads a summary of benchmark results from a TSV file and generates a bar plot based on the analysis type and plot type. Args: benchmark_summary_tsv (Path): Path to the summary TSV file containing benchmark results. gene_analysis (bool): Flag indicating whether to analyse gene prioritisation. variant_analysis (bool): Flag indicating whether to analyse variant prioritisation. disease_analysis (bool): Flag indicating whether to analyse disease prioritisation. plot_type (str): Type of plot to be generated (\"bar_stacked\", \"bar_cumulative\", \"bar_non_cumulative\"). title (str): Title for the generated plot. Raises: ValueError: If an unsupported plot type is specified. \"\"\" benchmark_stats_summary = read_benchmark_tsv_result_summary ( benchmark_summary_tsv ) benchmarking_results = parse_benchmark_result_summary ( benchmark_stats_summary ) if gene_analysis : benchmark_generator = GeneBenchmarkRunOutputGenerator () elif variant_analysis : benchmark_generator = VariantBenchmarkRunOutputGenerator () elif disease_analysis : benchmark_generator = DiseaseBenchmarkRunOutputGenerator () else : raise ValueError ( \"Specify one analysis type (gene_analysis, variant_analysis, or disease_analysis)\" ) generate_plots ( benchmarking_results , benchmark_generator , plot_type , title , True ) trim_corpus_results_directory_suffix ( corpus_results_directory ) Trim the suffix from the corpus results directory name. Parameters: Name Type Description Default corpus_results_directory Path The directory path containing corpus results. required Returns: Name Type Description Path Path The Path object with the suffix removed from the directory name. Source code in src/pheval/analyse/generate_plots.py 24 25 26 27 28 29 30 31 32 33 34 def trim_corpus_results_directory_suffix ( corpus_results_directory : Path ) -> Path : \"\"\" Trim the suffix from the corpus results directory name. Args: corpus_results_directory (Path): The directory path containing corpus results. Returns: Path: The Path object with the suffix removed from the directory name. \"\"\" return Path ( str ( corpus_results_directory ) . replace ( PHEVAL_RESULTS_DIRECTORY_SUFFIX , \"\" ))","title":"Generate plots"},{"location":"api/pheval/analyse/generate_plots/#src.pheval.analyse.generate_plots.PlotGenerator","text":"Class to generate plots. Source code in src/pheval/analyse/generate_plots.py 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 class PlotGenerator : \"\"\"Class to generate plots.\"\"\" palette_hex_codes = [ \"#f4ae3d\" , \"#ee5825\" , \"#2b7288\" , \"#9a84b2\" , \"#0c604c\" , \"#c94c4c\" , \"#3d8e83\" , \"#725ac1\" , \"#e7ba52\" , \"#1b9e77\" , ] def __init__ ( self , ): \"\"\" Initialise the PlotGenerator class. Note: `self.stats` will be used to store statistics data. `self.mrr` will store Mean Reciprocal Rank (MRR) values. Matplotlib settings are configured to remove the right and top axes spines for generated plots. \"\"\" self . stats , self . mrr = [], [] matplotlib . rcParams [ \"axes.spines.right\" ] = False matplotlib . rcParams [ \"axes.spines.top\" ] = False @staticmethod def _create_run_identifier ( results_dir : Path ) -> str : \"\"\" Create a run identifier from a path. Args: results_dir (Path): The directory path for results. Returns: str: A string representing the run identifier created from the given path. \"\"\" return f \" { Path ( results_dir ) . parents [ 0 ] . name } _ { trim_corpus_results_directory_suffix ( Path ( results_dir ) . name ) } \" def return_benchmark_name ( self , benchmark_result : BenchmarkRunResults ) -> str : \"\"\" Return the benchmark name for a run. Args: benchmark_result (BenchmarkRunResults): The benchmarking results for a run. Returns: str: The benchmark name obtained from the given BenchmarkRunResults instance. \"\"\" return ( benchmark_result . benchmark_name if benchmark_result . results_dir is None else self . _create_run_identifier ( benchmark_result . results_dir ) ) def _generate_stacked_bar_plot_data ( self , benchmark_result : BenchmarkRunResults ) -> None : \"\"\" Generate data in the correct format for dataframe creation for a stacked bar plot, appending to the self.stats attribute of the class. Args: benchmark_result (BenchmarkRunResults): The benchmarking results for a run. \"\"\" rank_stats = benchmark_result . rank_stats self . stats . append ( { \"Run\" : self . return_benchmark_name ( benchmark_result ), \"Top\" : benchmark_result . rank_stats . percentage_top (), \"2-3\" : rank_stats . percentage_difference ( rank_stats . percentage_top3 (), rank_stats . percentage_top () ), \"4-5\" : rank_stats . percentage_difference ( rank_stats . percentage_top5 (), rank_stats . percentage_top3 () ), \"6-10\" : rank_stats . percentage_difference ( rank_stats . percentage_top10 (), rank_stats . percentage_top5 () ), \">10\" : rank_stats . percentage_difference ( rank_stats . percentage_found (), rank_stats . percentage_top10 () ), \"Missed\" : rank_stats . percentage_difference ( 100 , rank_stats . percentage_found ()), } ) def _generate_stats_mrr_bar_plot_data ( self , benchmark_result : BenchmarkRunResults ) -> None : \"\"\" Generate data in the correct format for dataframe creation for MRR (Mean Reciprocal Rank) bar plot, appending to the self.mrr attribute of the class. Args: benchmark_result (BenchmarkRunResults): The benchmarking results for a run. \"\"\" self . mrr . extend ( [ { \"Rank\" : \"MRR\" , \"Percentage\" : benchmark_result . rank_stats . return_mean_reciprocal_rank (), \"Run\" : self . return_benchmark_name ( benchmark_result ), } ] ) def generate_stacked_bar_plot ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , title : str = None , ) -> None : \"\"\" Generate a stacked bar plot and Mean Reciprocal Rank (MRR) bar plot. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. title (str, optional): Title for the generated plot. Defaults to None. \"\"\" for benchmark_result in benchmarking_results : self . _generate_stacked_bar_plot_data ( benchmark_result ) self . _generate_stats_mrr_bar_plot_data ( benchmark_result ) stats_df = pd . DataFrame ( self . stats ) stats_df . set_index ( \"Run\" ) . plot ( kind = \"bar\" , stacked = True , color = self . palette_hex_codes , ylabel = benchmark_generator . y_label , edgecolor = \"white\" , ) . legend ( loc = \"center left\" , bbox_to_anchor = ( 1.0 , 0.5 )) if title is None : plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } Rank Stats\" ) else : plt . title ( title , loc = \"center\" , fontsize = 15 ) plt . ylim ( 0 , 100 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _rank_stats.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) mrr_df = pd . DataFrame ( self . mrr ) mrr_df . set_index ( \"Run\" ) . plot ( kind = \"bar\" , color = self . palette_hex_codes , ylabel = f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } mean reciprocal rank\" , legend = False , edgecolor = \"white\" , ) plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } results - mean reciprocal rank\" ) plt . ylim ( 0 , 1 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _mrr.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) def _generate_cumulative_bar_plot_data ( self , benchmark_result : BenchmarkRunResults ): \"\"\" Generate data in the correct format for dataframe creation for a cumulative bar plot, appending to the self.stats attribute of the class. Args: benchmark_result (BenchmarkRunResults): The benchmarking results for a run. \"\"\" rank_stats = benchmark_result . rank_stats run_identifier = self . return_benchmark_name ( benchmark_result ) self . stats . extend ( [ { \"Rank\" : \"Top\" , \"Percentage\" : rank_stats . percentage_top () / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"Top3\" , \"Percentage\" : rank_stats . percentage_top3 () / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"Top5\" , \"Percentage\" : rank_stats . percentage_top5 () / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"Top10\" , \"Percentage\" : rank_stats . percentage_top10 () / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"Found\" , \"Percentage\" : rank_stats . percentage_found () / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"Missed\" , \"Percentage\" : rank_stats . percentage_difference ( 100 , rank_stats . percentage_found () ) / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"MRR\" , \"Percentage\" : rank_stats . return_mean_reciprocal_rank (), \"Run\" : run_identifier , }, ] ) def generate_cumulative_bar ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , title : str = None , ) -> None : \"\"\" Generate a cumulative bar plot. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. title (str, optional): Title for the generated plot. Defaults to None. \"\"\" for benchmark_result in benchmarking_results : self . _generate_cumulative_bar_plot_data ( benchmark_result ) stats_df = pd . DataFrame ( self . stats ) sns . catplot ( data = stats_df , kind = \"bar\" , x = \"Rank\" , y = \"Percentage\" , hue = \"Run\" , palette = self . palette_hex_codes , edgecolor = \"white\" , legend = False , ) . set ( xlabel = \"Rank\" , ylabel = benchmark_generator . y_label ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 ), ncol = 3 , title = \"Run\" ) if title is None : plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } Cumulative Rank Stats\" ) else : plt . title ( title , loc = \"center\" , fontsize = 15 ) plt . ylim ( 0 , 1 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _rank_stats.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) def _generate_non_cumulative_bar_plot_data ( self , benchmark_result : BenchmarkRunResults ) -> [ dict ]: \"\"\" Generate data in the correct format for dataframe creation for a non-cumulative bar plot, appending to the self.stats attribute of the class. Args: benchmark_result (BenchmarkRunResults): The benchmarking results for a run. \"\"\" rank_stats = benchmark_result . rank_stats run_identifier = self . return_benchmark_name ( benchmark_result ) self . stats . extend ( [ { \"Rank\" : \"Top\" , \"Percentage\" : rank_stats . percentage_top () / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"2-3\" , \"Percentage\" : rank_stats . percentage_difference ( rank_stats . percentage_top3 (), rank_stats . percentage_top () ) / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"4-5\" , \"Percentage\" : rank_stats . percentage_difference ( rank_stats . percentage_top5 (), rank_stats . percentage_top3 () ) / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"6-10\" , \"Percentage\" : rank_stats . percentage_difference ( rank_stats . percentage_top10 (), rank_stats . percentage_top5 () ) / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \">10\" , \"Percentage\" : rank_stats . percentage_difference ( rank_stats . percentage_found (), rank_stats . percentage_top10 () ) / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"Missed\" , \"Percentage\" : rank_stats . percentage_difference ( 100 , rank_stats . percentage_found () ) / 100 , \"Run\" : run_identifier , }, { \"Rank\" : \"MRR\" , \"Percentage\" : rank_stats . return_mean_reciprocal_rank (), \"Run\" : run_identifier , }, ] ) def generate_roc_curve ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , ): \"\"\" Generate and plot Receiver Operating Characteristic (ROC) curves for binary classification benchmark results. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. \"\"\" for i , benchmark_result in enumerate ( benchmarking_results ): fpr , tpr , thresh = roc_curve ( benchmark_result . binary_classification_stats . labels , benchmark_result . binary_classification_stats . scores , pos_label = 1 , ) roc_auc = auc ( fpr , tpr ) plt . plot ( fpr , tpr , label = f \" { self . return_benchmark_name ( benchmark_result ) } ROC Curve (AUC = { roc_auc : .2f } )\" , color = self . palette_hex_codes [ i ], ) plt . plot ( linestyle = \"--\" , color = \"gray\" ) plt . xlabel ( \"False Positive Rate\" ) plt . ylabel ( \"True Positive Rate\" ) plt . title ( \"Receiver Operating Characteristic (ROC) Curve\" ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 )) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _roc_curve.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) def generate_precision_recall ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , ): \"\"\" Generate and plot Precision-Recall curves for binary classification benchmark results. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. \"\"\" plt . figure () for i , benchmark_result in enumerate ( benchmarking_results ): precision , recall , thresh = precision_recall_curve ( benchmark_result . binary_classification_stats . labels , benchmark_result . binary_classification_stats . scores , ) precision_recall_auc = auc ( recall , precision ) plt . plot ( recall , precision , label = f \" { self . return_benchmark_name ( benchmark_result ) } Precision-Recall Curve \" f \"(AUC = { precision_recall_auc : .2f } )\" , color = self . palette_hex_codes [ i ], ) plt . plot ( linestyle = \"--\" , color = \"gray\" ) plt . xlabel ( \"Recall\" ) plt . ylabel ( \"Precision\" ) plt . title ( \"Precision-Recall Curve\" ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 )) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _precision_recall_curve.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) def generate_non_cumulative_bar ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , title : str = None , ) -> None : \"\"\" Generate a non-cumulative bar plot. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. title (str, optional): Title for the generated plot. Defaults to None. \"\"\" for benchmark_result in benchmarking_results : self . _generate_non_cumulative_bar_plot_data ( benchmark_result ) stats_df = pd . DataFrame ( self . stats ) sns . catplot ( data = stats_df , kind = \"bar\" , x = \"Rank\" , y = \"Percentage\" , hue = \"Run\" , palette = self . palette_hex_codes , edgecolor = \"white\" , legend = False , ) . set ( xlabel = \"Rank\" , ylabel = benchmark_generator . y_label ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 ), ncol = 3 , title = \"Run\" ) if title is None : plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } Non-Cumulative Rank Stats\" ) else : plt . title ( title , loc = \"center\" , fontsize = 15 ) plt . ylim ( 0 , 1 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _rank_stats.svg\" , format = \"svg\" , bbox_inches = \"tight\" , )","title":"PlotGenerator"},{"location":"api/pheval/analyse/generate_plots/#src.pheval.analyse.generate_plots.PlotGenerator.__init__","text":"Initialise the PlotGenerator class. Note: self.stats will be used to store statistics data. self.mrr will store Mean Reciprocal Rank (MRR) values. Matplotlib settings are configured to remove the right and top axes spines for generated plots. Source code in src/pheval/analyse/generate_plots.py 53 54 55 56 57 58 59 60 61 62 63 64 65 66 def __init__ ( self , ): \"\"\" Initialise the PlotGenerator class. Note: `self.stats` will be used to store statistics data. `self.mrr` will store Mean Reciprocal Rank (MRR) values. Matplotlib settings are configured to remove the right and top axes spines for generated plots. \"\"\" self . stats , self . mrr = [], [] matplotlib . rcParams [ \"axes.spines.right\" ] = False matplotlib . rcParams [ \"axes.spines.top\" ] = False","title":"__init__"},{"location":"api/pheval/analyse/generate_plots/#src.pheval.analyse.generate_plots.PlotGenerator.generate_cumulative_bar","text":"Generate a cumulative bar plot. Parameters: Name Type Description Default benchmarking_results List [ BenchmarkRunResults ] List of benchmarking results for multiple runs. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required title str Title for the generated plot. Defaults to None. None Source code in src/pheval/analyse/generate_plots.py 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 def generate_cumulative_bar ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , title : str = None , ) -> None : \"\"\" Generate a cumulative bar plot. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. title (str, optional): Title for the generated plot. Defaults to None. \"\"\" for benchmark_result in benchmarking_results : self . _generate_cumulative_bar_plot_data ( benchmark_result ) stats_df = pd . DataFrame ( self . stats ) sns . catplot ( data = stats_df , kind = \"bar\" , x = \"Rank\" , y = \"Percentage\" , hue = \"Run\" , palette = self . palette_hex_codes , edgecolor = \"white\" , legend = False , ) . set ( xlabel = \"Rank\" , ylabel = benchmark_generator . y_label ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 ), ncol = 3 , title = \"Run\" ) if title is None : plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } Cumulative Rank Stats\" ) else : plt . title ( title , loc = \"center\" , fontsize = 15 ) plt . ylim ( 0 , 1 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _rank_stats.svg\" , format = \"svg\" , bbox_inches = \"tight\" , )","title":"generate_cumulative_bar"},{"location":"api/pheval/analyse/generate_plots/#src.pheval.analyse.generate_plots.PlotGenerator.generate_non_cumulative_bar","text":"Generate a non-cumulative bar plot. Parameters: Name Type Description Default benchmarking_results List [ BenchmarkRunResults ] List of benchmarking results for multiple runs. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required title str Title for the generated plot. Defaults to None. None Source code in src/pheval/analyse/generate_plots.py 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 def generate_non_cumulative_bar ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , title : str = None , ) -> None : \"\"\" Generate a non-cumulative bar plot. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. title (str, optional): Title for the generated plot. Defaults to None. \"\"\" for benchmark_result in benchmarking_results : self . _generate_non_cumulative_bar_plot_data ( benchmark_result ) stats_df = pd . DataFrame ( self . stats ) sns . catplot ( data = stats_df , kind = \"bar\" , x = \"Rank\" , y = \"Percentage\" , hue = \"Run\" , palette = self . palette_hex_codes , edgecolor = \"white\" , legend = False , ) . set ( xlabel = \"Rank\" , ylabel = benchmark_generator . y_label ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 ), ncol = 3 , title = \"Run\" ) if title is None : plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } Non-Cumulative Rank Stats\" ) else : plt . title ( title , loc = \"center\" , fontsize = 15 ) plt . ylim ( 0 , 1 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _rank_stats.svg\" , format = \"svg\" , bbox_inches = \"tight\" , )","title":"generate_non_cumulative_bar"},{"location":"api/pheval/analyse/generate_plots/#src.pheval.analyse.generate_plots.PlotGenerator.generate_precision_recall","text":"Generate and plot Precision-Recall curves for binary classification benchmark results. Parameters: Name Type Description Default benchmarking_results List [ BenchmarkRunResults ] List of benchmarking results for multiple runs. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required Source code in src/pheval/analyse/generate_plots.py 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 def generate_precision_recall ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , ): \"\"\" Generate and plot Precision-Recall curves for binary classification benchmark results. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. \"\"\" plt . figure () for i , benchmark_result in enumerate ( benchmarking_results ): precision , recall , thresh = precision_recall_curve ( benchmark_result . binary_classification_stats . labels , benchmark_result . binary_classification_stats . scores , ) precision_recall_auc = auc ( recall , precision ) plt . plot ( recall , precision , label = f \" { self . return_benchmark_name ( benchmark_result ) } Precision-Recall Curve \" f \"(AUC = { precision_recall_auc : .2f } )\" , color = self . palette_hex_codes [ i ], ) plt . plot ( linestyle = \"--\" , color = \"gray\" ) plt . xlabel ( \"Recall\" ) plt . ylabel ( \"Precision\" ) plt . title ( \"Precision-Recall Curve\" ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 )) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _precision_recall_curve.svg\" , format = \"svg\" , bbox_inches = \"tight\" , )","title":"generate_precision_recall"},{"location":"api/pheval/analyse/generate_plots/#src.pheval.analyse.generate_plots.PlotGenerator.generate_roc_curve","text":"Generate and plot Receiver Operating Characteristic (ROC) curves for binary classification benchmark results. Parameters: Name Type Description Default benchmarking_results List [ BenchmarkRunResults ] List of benchmarking results for multiple runs. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required Source code in src/pheval/analyse/generate_plots.py 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 def generate_roc_curve ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , ): \"\"\" Generate and plot Receiver Operating Characteristic (ROC) curves for binary classification benchmark results. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. \"\"\" for i , benchmark_result in enumerate ( benchmarking_results ): fpr , tpr , thresh = roc_curve ( benchmark_result . binary_classification_stats . labels , benchmark_result . binary_classification_stats . scores , pos_label = 1 , ) roc_auc = auc ( fpr , tpr ) plt . plot ( fpr , tpr , label = f \" { self . return_benchmark_name ( benchmark_result ) } ROC Curve (AUC = { roc_auc : .2f } )\" , color = self . palette_hex_codes [ i ], ) plt . plot ( linestyle = \"--\" , color = \"gray\" ) plt . xlabel ( \"False Positive Rate\" ) plt . ylabel ( \"True Positive Rate\" ) plt . title ( \"Receiver Operating Characteristic (ROC) Curve\" ) plt . legend ( loc = \"upper center\" , bbox_to_anchor = ( 0.5 , - 0.15 )) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _roc_curve.svg\" , format = \"svg\" , bbox_inches = \"tight\" , )","title":"generate_roc_curve"},{"location":"api/pheval/analyse/generate_plots/#src.pheval.analyse.generate_plots.PlotGenerator.generate_stacked_bar_plot","text":"Generate a stacked bar plot and Mean Reciprocal Rank (MRR) bar plot. Parameters: Name Type Description Default benchmarking_results List [ BenchmarkRunResults ] List of benchmarking results for multiple runs. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required title str Title for the generated plot. Defaults to None. None Source code in src/pheval/analyse/generate_plots.py 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 def generate_stacked_bar_plot ( self , benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , title : str = None , ) -> None : \"\"\" Generate a stacked bar plot and Mean Reciprocal Rank (MRR) bar plot. Args: benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. title (str, optional): Title for the generated plot. Defaults to None. \"\"\" for benchmark_result in benchmarking_results : self . _generate_stacked_bar_plot_data ( benchmark_result ) self . _generate_stats_mrr_bar_plot_data ( benchmark_result ) stats_df = pd . DataFrame ( self . stats ) stats_df . set_index ( \"Run\" ) . plot ( kind = \"bar\" , stacked = True , color = self . palette_hex_codes , ylabel = benchmark_generator . y_label , edgecolor = \"white\" , ) . legend ( loc = \"center left\" , bbox_to_anchor = ( 1.0 , 0.5 )) if title is None : plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } Rank Stats\" ) else : plt . title ( title , loc = \"center\" , fontsize = 15 ) plt . ylim ( 0 , 100 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _rank_stats.svg\" , format = \"svg\" , bbox_inches = \"tight\" , ) mrr_df = pd . DataFrame ( self . mrr ) mrr_df . set_index ( \"Run\" ) . plot ( kind = \"bar\" , color = self . palette_hex_codes , ylabel = f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } mean reciprocal rank\" , legend = False , edgecolor = \"white\" , ) plt . title ( f \" { benchmark_generator . prioritisation_type_file_prefix . capitalize () } results - mean reciprocal rank\" ) plt . ylim ( 0 , 1 ) plt . savefig ( f \" { benchmark_generator . prioritisation_type_file_prefix } _mrr.svg\" , format = \"svg\" , bbox_inches = \"tight\" , )","title":"generate_stacked_bar_plot"},{"location":"api/pheval/analyse/generate_plots/#src.pheval.analyse.generate_plots.PlotGenerator.return_benchmark_name","text":"Return the benchmark name for a run. Parameters: Name Type Description Default benchmark_result BenchmarkRunResults The benchmarking results for a run. required Returns: Name Type Description str str The benchmark name obtained from the given BenchmarkRunResults instance. Source code in src/pheval/analyse/generate_plots.py 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 def return_benchmark_name ( self , benchmark_result : BenchmarkRunResults ) -> str : \"\"\" Return the benchmark name for a run. Args: benchmark_result (BenchmarkRunResults): The benchmarking results for a run. Returns: str: The benchmark name obtained from the given BenchmarkRunResults instance. \"\"\" return ( benchmark_result . benchmark_name if benchmark_result . results_dir is None else self . _create_run_identifier ( benchmark_result . results_dir ) )","title":"return_benchmark_name"},{"location":"api/pheval/analyse/generate_plots/#src.pheval.analyse.generate_plots.generate_plots","text":"Generate summary statistics bar plots for prioritisation. This method generates summary statistics bar plots based on the provided benchmarking results and plot type. Parameters: Name Type Description Default benchmarking_results list [ BenchmarkRunResults ] List of benchmarking results for multiple runs. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required plot_type str Type of plot to be generated (\"bar_stacked\", \"bar_cumulative\", \"bar_non_cumulative\"). required title str Title for the generated plot. Defaults to None. None generate_from_tsv bool Specify whether to generate plots from the TSV file. Defaults to False. False Source code in src/pheval/analyse/generate_plots.py 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 def generate_plots ( benchmarking_results : List [ BenchmarkRunResults ], benchmark_generator : BenchmarkRunOutputGenerator , plot_type : str , title : str = None , generate_from_tsv : bool = False , ) -> None : \"\"\" Generate summary statistics bar plots for prioritisation. This method generates summary statistics bar plots based on the provided benchmarking results and plot type. Args: benchmarking_results (list[BenchmarkRunResults]): List of benchmarking results for multiple runs. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. plot_type (str): Type of plot to be generated (\"bar_stacked\", \"bar_cumulative\", \"bar_non_cumulative\"). title (str, optional): Title for the generated plot. Defaults to None. generate_from_tsv (bool): Specify whether to generate plots from the TSV file. Defaults to False. \"\"\" plot_generator = PlotGenerator () if not generate_from_tsv : plot_generator . generate_roc_curve ( benchmarking_results , benchmark_generator ) plot_generator . generate_precision_recall ( benchmarking_results , benchmark_generator ) if plot_type == \"bar_stacked\" : plot_generator . generate_stacked_bar_plot ( benchmarking_results , benchmark_generator , title ) elif plot_type == \"bar_cumulative\" : plot_generator . generate_cumulative_bar ( benchmarking_results , benchmark_generator , title ) elif plot_type == \"bar_non_cumulative\" : plot_generator . generate_non_cumulative_bar ( benchmarking_results , benchmark_generator , title )","title":"generate_plots"},{"location":"api/pheval/analyse/generate_plots/#src.pheval.analyse.generate_plots.generate_plots_from_benchmark_summary_tsv","text":"Generate bar plot from summary benchmark results. Reads a summary of benchmark results from a TSV file and generates a bar plot based on the analysis type and plot type. Parameters: Name Type Description Default benchmark_summary_tsv Path Path to the summary TSV file containing benchmark results. required gene_analysis bool Flag indicating whether to analyse gene prioritisation. required variant_analysis bool Flag indicating whether to analyse variant prioritisation. required disease_analysis bool Flag indicating whether to analyse disease prioritisation. required plot_type str Type of plot to be generated (\"bar_stacked\", \"bar_cumulative\", \"bar_non_cumulative\"). required title str Title for the generated plot. required Raises: ValueError: If an unsupported plot type is specified. Source code in src/pheval/analyse/generate_plots.py 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 def generate_plots_from_benchmark_summary_tsv ( benchmark_summary_tsv : Path , gene_analysis : bool , variant_analysis : bool , disease_analysis : bool , plot_type : str , title : str , ): \"\"\" Generate bar plot from summary benchmark results. Reads a summary of benchmark results from a TSV file and generates a bar plot based on the analysis type and plot type. Args: benchmark_summary_tsv (Path): Path to the summary TSV file containing benchmark results. gene_analysis (bool): Flag indicating whether to analyse gene prioritisation. variant_analysis (bool): Flag indicating whether to analyse variant prioritisation. disease_analysis (bool): Flag indicating whether to analyse disease prioritisation. plot_type (str): Type of plot to be generated (\"bar_stacked\", \"bar_cumulative\", \"bar_non_cumulative\"). title (str): Title for the generated plot. Raises: ValueError: If an unsupported plot type is specified. \"\"\" benchmark_stats_summary = read_benchmark_tsv_result_summary ( benchmark_summary_tsv ) benchmarking_results = parse_benchmark_result_summary ( benchmark_stats_summary ) if gene_analysis : benchmark_generator = GeneBenchmarkRunOutputGenerator () elif variant_analysis : benchmark_generator = VariantBenchmarkRunOutputGenerator () elif disease_analysis : benchmark_generator = DiseaseBenchmarkRunOutputGenerator () else : raise ValueError ( \"Specify one analysis type (gene_analysis, variant_analysis, or disease_analysis)\" ) generate_plots ( benchmarking_results , benchmark_generator , plot_type , title , True )","title":"generate_plots_from_benchmark_summary_tsv"},{"location":"api/pheval/analyse/generate_plots/#src.pheval.analyse.generate_plots.trim_corpus_results_directory_suffix","text":"Trim the suffix from the corpus results directory name. Parameters: Name Type Description Default corpus_results_directory Path The directory path containing corpus results. required Returns: Name Type Description Path Path The Path object with the suffix removed from the directory name. Source code in src/pheval/analyse/generate_plots.py 24 25 26 27 28 29 30 31 32 33 34 def trim_corpus_results_directory_suffix ( corpus_results_directory : Path ) -> Path : \"\"\" Trim the suffix from the corpus results directory name. Args: corpus_results_directory (Path): The directory path containing corpus results. Returns: Path: The Path object with the suffix removed from the directory name. \"\"\" return Path ( str ( corpus_results_directory ) . replace ( PHEVAL_RESULTS_DIRECTORY_SUFFIX , \"\" ))","title":"trim_corpus_results_directory_suffix"},{"location":"api/pheval/analyse/generate_summary_outputs/","text":"RankComparisonGenerator Class for writing the run comparison of rank assignment for prioritisation. Source code in src/pheval/analyse/generate_summary_outputs.py 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 class RankComparisonGenerator : \"\"\"Class for writing the run comparison of rank assignment for prioritisation.\"\"\" def __init__ ( self , run_comparison : defaultdict ): \"\"\" Initialise the RankComparisonGenerator class. Args: run_comparison (defaultdict): A nested dictionary containing the run comparison data. \"\"\" self . run_comparison = run_comparison def _generate_dataframe ( self ) -> pd . DataFrame : \"\"\" Generate a Pandas DataFrame based on the run comparison data. Returns: pd.DataFrame: DataFrame containing the run comparison data. \"\"\" return pd . DataFrame . from_dict ( self . run_comparison , orient = \"index\" ) def _calculate_rank_difference ( self ) -> pd . DataFrame : \"\"\" Calculate the rank decrease for runs, taking the first directory as a baseline. Returns: pd.DataFrame: DataFrame containing the calculated rank differences. \"\"\" comparison_df = self . _generate_dataframe () comparison_df [ \"rank_change\" ] = comparison_df . iloc [:, 2 ] - comparison_df . iloc [:, 3 ] comparison_df [ \"rank_change\" ] = np . where ( ( comparison_df . iloc [:, 2 ] == 0 ) & ( comparison_df . iloc [:, 3 ] != 0 ), \"GAINED\" , np . where ( ( comparison_df . iloc [:, 3 ] == 0 ) & ( comparison_df . iloc [:, 2 ] != 0 ), \"LOST\" , comparison_df [ \"rank_change\" ], ), ) comparison_df [ \"rank_change\" ] = comparison_df [ \"rank_change\" ] . apply ( lambda x : int ( x ) if str ( x ) . lstrip ( \"-\" ) . isdigit () else x ) return comparison_df def generate_output ( self , prefix : str , suffix : str ) -> None : \"\"\" Generate output file from the run comparison data. Args: prefix (str): Prefix for the output file name. suffix (str): Suffix for the output file name. \"\"\" self . _generate_dataframe () . to_csv ( prefix + suffix , sep = \" \\t \" ) def generate_comparison_output ( self , prefix : str , suffix : str ) -> None : \"\"\" Generate output file with calculated rank differences. Args: prefix (str): Prefix for the output file name. suffix (str): Suffix for the output file name. \"\"\" self . _calculate_rank_difference () . to_csv ( prefix + suffix , sep = \" \\t \" ) __init__ ( run_comparison ) Initialise the RankComparisonGenerator class. Parameters: Name Type Description Default run_comparison defaultdict A nested dictionary containing the run comparison data. required Source code in src/pheval/analyse/generate_summary_outputs.py 18 19 20 21 22 23 24 25 def __init__ ( self , run_comparison : defaultdict ): \"\"\" Initialise the RankComparisonGenerator class. Args: run_comparison (defaultdict): A nested dictionary containing the run comparison data. \"\"\" self . run_comparison = run_comparison generate_comparison_output ( prefix , suffix ) Generate output file with calculated rank differences. Parameters: Name Type Description Default prefix str Prefix for the output file name. required suffix str Suffix for the output file name. required Source code in src/pheval/analyse/generate_summary_outputs.py 69 70 71 72 73 74 75 76 77 def generate_comparison_output ( self , prefix : str , suffix : str ) -> None : \"\"\" Generate output file with calculated rank differences. Args: prefix (str): Prefix for the output file name. suffix (str): Suffix for the output file name. \"\"\" self . _calculate_rank_difference () . to_csv ( prefix + suffix , sep = \" \\t \" ) generate_output ( prefix , suffix ) Generate output file from the run comparison data. Parameters: Name Type Description Default prefix str Prefix for the output file name. required suffix str Suffix for the output file name. required Source code in src/pheval/analyse/generate_summary_outputs.py 59 60 61 62 63 64 65 66 67 def generate_output ( self , prefix : str , suffix : str ) -> None : \"\"\" Generate output file from the run comparison data. Args: prefix (str): Prefix for the output file name. suffix (str): Suffix for the output file name. \"\"\" self . _generate_dataframe () . to_csv ( prefix + suffix , sep = \" \\t \" ) generate_benchmark_comparison_output ( benchmarking_results , plot_type , benchmark_generator ) Generate prioritisation outputs for benchmarking multiple runs. This function generates comparison outputs for benchmarking multiple runs. It compares the results between pairs of BenchmarkRunResults instances in benchmarking_results and generates rank comparison outputs using RankComparisonGenerator for each pair. Parameters: Name Type Description Default benchmarking_results List [ BenchmarkRunResults ] A list containing BenchmarkRunResults instances representing the benchmarking results of multiple runs. required plot_type str The type of plot to be generated. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required Source code in src/pheval/analyse/generate_summary_outputs.py 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 def generate_benchmark_comparison_output ( benchmarking_results : List [ BenchmarkRunResults ], plot_type : str , benchmark_generator : BenchmarkRunOutputGenerator , ) -> None : \"\"\" Generate prioritisation outputs for benchmarking multiple runs. This function generates comparison outputs for benchmarking multiple runs. It compares the results between pairs of `BenchmarkRunResults` instances in `benchmarking_results` and generates rank comparison outputs using `RankComparisonGenerator` for each pair. Args: benchmarking_results (List[BenchmarkRunResults]): A list containing BenchmarkRunResults instances representing the benchmarking results of multiple runs. plot_type (str): The type of plot to be generated. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. \"\"\" output_prefix = benchmark_generator . prioritisation_type_file_prefix for pair in itertools . combinations ( benchmarking_results , 2 ): result1 = pair [ 0 ] result2 = pair [ 1 ] merged_results = merge_results ( deepcopy ( result1 . ranks ), deepcopy ( result2 . ranks ), ) RankComparisonGenerator ( merged_results ) . generate_comparison_output ( f \" { result1 . results_dir . parents [ 0 ] . name } _\" f \" { result1 . results_dir . name } \" f \"_vs_ { result2 . results_dir . parents [ 0 ] . name } _\" f \" { result2 . results_dir . name } \" , f \"- { output_prefix }{ RANK_COMPARISON_FILE_SUFFIX } \" , ) generate_plots ( benchmarking_results , benchmark_generator , plot_type , ) generate_benchmark_output ( benchmarking_results , plot_type , benchmark_generator ) Generate prioritisation outputs for a single benchmarking run. Parameters: Name Type Description Default benchmarking_results BenchmarkRunResults Results of a benchmarking run. required plot_type str Type of plot to generate. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required Source code in src/pheval/analyse/generate_summary_outputs.py 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 def generate_benchmark_output ( benchmarking_results : BenchmarkRunResults , plot_type : str , benchmark_generator : BenchmarkRunOutputGenerator , ) -> None : \"\"\" Generate prioritisation outputs for a single benchmarking run. Args: benchmarking_results (BenchmarkRunResults): Results of a benchmarking run. plot_type (str): Type of plot to generate. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. \"\"\" rank_comparison_data = benchmarking_results . ranks results_dir_name = benchmarking_results . results_dir . name RankComparisonGenerator ( rank_comparison_data ) . generate_output ( f \" { results_dir_name } \" , f \"- { benchmark_generator . prioritisation_type_file_prefix }{ RANK_COMPARISON_FILE_SUFFIX } \" , ) generate_plots ( [ benchmarking_results ], benchmark_generator , plot_type , ) merge_results ( result1 , result2 ) Merge two nested dictionaries containing results on commonalities. This function merges two dictionaries, result1 and result2 , containing nested structures. It traverses the dictionaries recursively and merges their contents based on common keys. If a key is present in both dictionaries and points to another dictionary, the function will further merge their nested contents. If a key exists in result2 but not in result1 , it will be added to result1 . Parameters: Name Type Description Default result1 dict The first dictionary to be merged. required result2 dict The second dictionary to be merged. required Returns: Name Type Description defaultdict defaultdict The merged dictionary containing the combined contents of result1 and result2 . Source code in src/pheval/analyse/generate_summary_outputs.py 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 def merge_results ( result1 : dict , result2 : dict ) -> defaultdict : \"\"\" Merge two nested dictionaries containing results on commonalities. This function merges two dictionaries, `result1` and `result2`, containing nested structures. It traverses the dictionaries recursively and merges their contents based on common keys. If a key is present in both dictionaries and points to another dictionary, the function will further merge their nested contents. If a key exists in `result2` but not in `result1`, it will be added to `result1`. Args: result1 (dict): The first dictionary to be merged. result2 (dict): The second dictionary to be merged. Returns: defaultdict: The merged dictionary containing the combined contents of `result1` and `result2`. \"\"\" for key , val in result1 . items (): if type ( val ) == dict : if key in result2 and type ( result2 [ key ] == dict ): merge_results ( result1 [ key ], result2 [ key ]) else : if key in result2 : result1 [ key ] = result2 [ key ] for key , val in result2 . items (): if key not in result1 : result1 [ key ] = val return result1","title":"Generate summary outputs"},{"location":"api/pheval/analyse/generate_summary_outputs/#src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator","text":"Class for writing the run comparison of rank assignment for prioritisation. Source code in src/pheval/analyse/generate_summary_outputs.py 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 class RankComparisonGenerator : \"\"\"Class for writing the run comparison of rank assignment for prioritisation.\"\"\" def __init__ ( self , run_comparison : defaultdict ): \"\"\" Initialise the RankComparisonGenerator class. Args: run_comparison (defaultdict): A nested dictionary containing the run comparison data. \"\"\" self . run_comparison = run_comparison def _generate_dataframe ( self ) -> pd . DataFrame : \"\"\" Generate a Pandas DataFrame based on the run comparison data. Returns: pd.DataFrame: DataFrame containing the run comparison data. \"\"\" return pd . DataFrame . from_dict ( self . run_comparison , orient = \"index\" ) def _calculate_rank_difference ( self ) -> pd . DataFrame : \"\"\" Calculate the rank decrease for runs, taking the first directory as a baseline. Returns: pd.DataFrame: DataFrame containing the calculated rank differences. \"\"\" comparison_df = self . _generate_dataframe () comparison_df [ \"rank_change\" ] = comparison_df . iloc [:, 2 ] - comparison_df . iloc [:, 3 ] comparison_df [ \"rank_change\" ] = np . where ( ( comparison_df . iloc [:, 2 ] == 0 ) & ( comparison_df . iloc [:, 3 ] != 0 ), \"GAINED\" , np . where ( ( comparison_df . iloc [:, 3 ] == 0 ) & ( comparison_df . iloc [:, 2 ] != 0 ), \"LOST\" , comparison_df [ \"rank_change\" ], ), ) comparison_df [ \"rank_change\" ] = comparison_df [ \"rank_change\" ] . apply ( lambda x : int ( x ) if str ( x ) . lstrip ( \"-\" ) . isdigit () else x ) return comparison_df def generate_output ( self , prefix : str , suffix : str ) -> None : \"\"\" Generate output file from the run comparison data. Args: prefix (str): Prefix for the output file name. suffix (str): Suffix for the output file name. \"\"\" self . _generate_dataframe () . to_csv ( prefix + suffix , sep = \" \\t \" ) def generate_comparison_output ( self , prefix : str , suffix : str ) -> None : \"\"\" Generate output file with calculated rank differences. Args: prefix (str): Prefix for the output file name. suffix (str): Suffix for the output file name. \"\"\" self . _calculate_rank_difference () . to_csv ( prefix + suffix , sep = \" \\t \" )","title":"RankComparisonGenerator"},{"location":"api/pheval/analyse/generate_summary_outputs/#src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.__init__","text":"Initialise the RankComparisonGenerator class. Parameters: Name Type Description Default run_comparison defaultdict A nested dictionary containing the run comparison data. required Source code in src/pheval/analyse/generate_summary_outputs.py 18 19 20 21 22 23 24 25 def __init__ ( self , run_comparison : defaultdict ): \"\"\" Initialise the RankComparisonGenerator class. Args: run_comparison (defaultdict): A nested dictionary containing the run comparison data. \"\"\" self . run_comparison = run_comparison","title":"__init__"},{"location":"api/pheval/analyse/generate_summary_outputs/#src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.generate_comparison_output","text":"Generate output file with calculated rank differences. Parameters: Name Type Description Default prefix str Prefix for the output file name. required suffix str Suffix for the output file name. required Source code in src/pheval/analyse/generate_summary_outputs.py 69 70 71 72 73 74 75 76 77 def generate_comparison_output ( self , prefix : str , suffix : str ) -> None : \"\"\" Generate output file with calculated rank differences. Args: prefix (str): Prefix for the output file name. suffix (str): Suffix for the output file name. \"\"\" self . _calculate_rank_difference () . to_csv ( prefix + suffix , sep = \" \\t \" )","title":"generate_comparison_output"},{"location":"api/pheval/analyse/generate_summary_outputs/#src.pheval.analyse.generate_summary_outputs.RankComparisonGenerator.generate_output","text":"Generate output file from the run comparison data. Parameters: Name Type Description Default prefix str Prefix for the output file name. required suffix str Suffix for the output file name. required Source code in src/pheval/analyse/generate_summary_outputs.py 59 60 61 62 63 64 65 66 67 def generate_output ( self , prefix : str , suffix : str ) -> None : \"\"\" Generate output file from the run comparison data. Args: prefix (str): Prefix for the output file name. suffix (str): Suffix for the output file name. \"\"\" self . _generate_dataframe () . to_csv ( prefix + suffix , sep = \" \\t \" )","title":"generate_output"},{"location":"api/pheval/analyse/generate_summary_outputs/#src.pheval.analyse.generate_summary_outputs.generate_benchmark_comparison_output","text":"Generate prioritisation outputs for benchmarking multiple runs. This function generates comparison outputs for benchmarking multiple runs. It compares the results between pairs of BenchmarkRunResults instances in benchmarking_results and generates rank comparison outputs using RankComparisonGenerator for each pair. Parameters: Name Type Description Default benchmarking_results List [ BenchmarkRunResults ] A list containing BenchmarkRunResults instances representing the benchmarking results of multiple runs. required plot_type str The type of plot to be generated. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required Source code in src/pheval/analyse/generate_summary_outputs.py 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 def generate_benchmark_comparison_output ( benchmarking_results : List [ BenchmarkRunResults ], plot_type : str , benchmark_generator : BenchmarkRunOutputGenerator , ) -> None : \"\"\" Generate prioritisation outputs for benchmarking multiple runs. This function generates comparison outputs for benchmarking multiple runs. It compares the results between pairs of `BenchmarkRunResults` instances in `benchmarking_results` and generates rank comparison outputs using `RankComparisonGenerator` for each pair. Args: benchmarking_results (List[BenchmarkRunResults]): A list containing BenchmarkRunResults instances representing the benchmarking results of multiple runs. plot_type (str): The type of plot to be generated. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. \"\"\" output_prefix = benchmark_generator . prioritisation_type_file_prefix for pair in itertools . combinations ( benchmarking_results , 2 ): result1 = pair [ 0 ] result2 = pair [ 1 ] merged_results = merge_results ( deepcopy ( result1 . ranks ), deepcopy ( result2 . ranks ), ) RankComparisonGenerator ( merged_results ) . generate_comparison_output ( f \" { result1 . results_dir . parents [ 0 ] . name } _\" f \" { result1 . results_dir . name } \" f \"_vs_ { result2 . results_dir . parents [ 0 ] . name } _\" f \" { result2 . results_dir . name } \" , f \"- { output_prefix }{ RANK_COMPARISON_FILE_SUFFIX } \" , ) generate_plots ( benchmarking_results , benchmark_generator , plot_type , )","title":"generate_benchmark_comparison_output"},{"location":"api/pheval/analyse/generate_summary_outputs/#src.pheval.analyse.generate_summary_outputs.generate_benchmark_output","text":"Generate prioritisation outputs for a single benchmarking run. Parameters: Name Type Description Default benchmarking_results BenchmarkRunResults Results of a benchmarking run. required plot_type str Type of plot to generate. required benchmark_generator BenchmarkRunOutputGenerator Object containing benchmarking output generation details. required Source code in src/pheval/analyse/generate_summary_outputs.py 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 def generate_benchmark_output ( benchmarking_results : BenchmarkRunResults , plot_type : str , benchmark_generator : BenchmarkRunOutputGenerator , ) -> None : \"\"\" Generate prioritisation outputs for a single benchmarking run. Args: benchmarking_results (BenchmarkRunResults): Results of a benchmarking run. plot_type (str): Type of plot to generate. benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details. \"\"\" rank_comparison_data = benchmarking_results . ranks results_dir_name = benchmarking_results . results_dir . name RankComparisonGenerator ( rank_comparison_data ) . generate_output ( f \" { results_dir_name } \" , f \"- { benchmark_generator . prioritisation_type_file_prefix }{ RANK_COMPARISON_FILE_SUFFIX } \" , ) generate_plots ( [ benchmarking_results ], benchmark_generator , plot_type , )","title":"generate_benchmark_output"},{"location":"api/pheval/analyse/generate_summary_outputs/#src.pheval.analyse.generate_summary_outputs.merge_results","text":"Merge two nested dictionaries containing results on commonalities. This function merges two dictionaries, result1 and result2 , containing nested structures. It traverses the dictionaries recursively and merges their contents based on common keys. If a key is present in both dictionaries and points to another dictionary, the function will further merge their nested contents. If a key exists in result2 but not in result1 , it will be added to result1 . Parameters: Name Type Description Default result1 dict The first dictionary to be merged. required result2 dict The second dictionary to be merged. required Returns: Name Type Description defaultdict defaultdict The merged dictionary containing the combined contents of result1 and result2 . Source code in src/pheval/analyse/generate_summary_outputs.py 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 def merge_results ( result1 : dict , result2 : dict ) -> defaultdict : \"\"\" Merge two nested dictionaries containing results on commonalities. This function merges two dictionaries, `result1` and `result2`, containing nested structures. It traverses the dictionaries recursively and merges their contents based on common keys. If a key is present in both dictionaries and points to another dictionary, the function will further merge their nested contents. If a key exists in `result2` but not in `result1`, it will be added to `result1`. Args: result1 (dict): The first dictionary to be merged. result2 (dict): The second dictionary to be merged. Returns: defaultdict: The merged dictionary containing the combined contents of `result1` and `result2`. \"\"\" for key , val in result1 . items (): if type ( val ) == dict : if key in result2 and type ( result2 [ key ] == dict ): merge_results ( result1 [ key ], result2 [ key ]) else : if key in result2 : result1 [ key ] = result2 [ key ] for key , val in result2 . items (): if key not in result1 : result1 [ key ] = val return result1","title":"merge_results"},{"location":"api/pheval/analyse/parse_benchmark_summary/","text":"parse_benchmark_result_summary ( benchmarking_df ) Parse the summary benchmark DataFrame into a list of BenchmarkRunResults. Parameters: Name Type Description Default benchmarking_df DataFrame Summary benchmark DataFrame containing columns such as 'results_directory_path', 'top', 'top3', 'top5', 'top10', 'found', 'total', 'mean_reciprocal_rank'. required Returns: Type Description List [ BenchmarkRunResults ] List[BenchmarkRunResults]: A list of BenchmarkRunResults instances generated from the DataFrame. Source code in src/pheval/analyse/parse_benchmark_summary.py 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 def parse_benchmark_result_summary ( benchmarking_df : pd . DataFrame ) -> List [ BenchmarkRunResults ]: \"\"\" Parse the summary benchmark DataFrame into a list of BenchmarkRunResults. Args: benchmarking_df (pd.DataFrame): Summary benchmark DataFrame containing columns such as 'results_directory_path', 'top', 'top3', 'top5', 'top10', 'found', 'total', 'mean_reciprocal_rank'. Returns: List[BenchmarkRunResults]: A list of BenchmarkRunResults instances generated from the DataFrame. \"\"\" benchmarking_results = [] for _ , row in benchmarking_df . iterrows (): benchmarking_result = BenchmarkRunResults ( rank_stats = RankStats ( top = row [ \"top\" ], top3 = row [ \"top3\" ], top5 = row [ \"top5\" ], top10 = row [ \"top10\" ], found = row [ \"found\" ], total = row [ \"total\" ], mrr = row [ \"mean_reciprocal_rank\" ], ), ranks = {}, benchmark_name = row [ \"results_directory_path\" ], binary_classification_stats = BinaryClassificationStats (), ) benchmarking_results . append ( benchmarking_result ) return benchmarking_results read_benchmark_tsv_result_summary ( benchmarking_tsv ) Read the summary benchmark TSV output generated from the benchmark-comparison command. Parameters: Name Type Description Default benchmarking_tsv Path Path to the summary benchmark TSV output file. required Returns: Type Description DataFrame pd.DataFrame: A pandas DataFrame containing specific columns from the TSV file, including: 'results_directory_path', 'top', 'top3', 'top5', 'top10', 'found', 'total', 'mean_reciprocal_rank'. Source code in src/pheval/analyse/parse_benchmark_summary.py 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 def read_benchmark_tsv_result_summary ( benchmarking_tsv : Path ) -> pd . DataFrame : \"\"\" Read the summary benchmark TSV output generated from the benchmark-comparison command. Args: benchmarking_tsv (Path): Path to the summary benchmark TSV output file. Returns: pd.DataFrame: A pandas DataFrame containing specific columns from the TSV file, including: 'results_directory_path', 'top', 'top3', 'top5', 'top10', 'found', 'total', 'mean_reciprocal_rank'. \"\"\" return pd . read_csv ( benchmarking_tsv , delimiter = \" \\t \" , usecols = [ \"results_directory_path\" , \"top\" , \"top3\" , \"top5\" , \"top10\" , \"found\" , \"total\" , \"mean_reciprocal_rank\" , ], )","title":"Parse benchmark summary"},{"location":"api/pheval/analyse/parse_benchmark_summary/#src.pheval.analyse.parse_benchmark_summary.parse_benchmark_result_summary","text":"Parse the summary benchmark DataFrame into a list of BenchmarkRunResults. Parameters: Name Type Description Default benchmarking_df DataFrame Summary benchmark DataFrame containing columns such as 'results_directory_path', 'top', 'top3', 'top5', 'top10', 'found', 'total', 'mean_reciprocal_rank'. required Returns: Type Description List [ BenchmarkRunResults ] List[BenchmarkRunResults]: A list of BenchmarkRunResults instances generated from the DataFrame. Source code in src/pheval/analyse/parse_benchmark_summary.py 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 def parse_benchmark_result_summary ( benchmarking_df : pd . DataFrame ) -> List [ BenchmarkRunResults ]: \"\"\" Parse the summary benchmark DataFrame into a list of BenchmarkRunResults. Args: benchmarking_df (pd.DataFrame): Summary benchmark DataFrame containing columns such as 'results_directory_path', 'top', 'top3', 'top5', 'top10', 'found', 'total', 'mean_reciprocal_rank'. Returns: List[BenchmarkRunResults]: A list of BenchmarkRunResults instances generated from the DataFrame. \"\"\" benchmarking_results = [] for _ , row in benchmarking_df . iterrows (): benchmarking_result = BenchmarkRunResults ( rank_stats = RankStats ( top = row [ \"top\" ], top3 = row [ \"top3\" ], top5 = row [ \"top5\" ], top10 = row [ \"top10\" ], found = row [ \"found\" ], total = row [ \"total\" ], mrr = row [ \"mean_reciprocal_rank\" ], ), ranks = {}, benchmark_name = row [ \"results_directory_path\" ], binary_classification_stats = BinaryClassificationStats (), ) benchmarking_results . append ( benchmarking_result ) return benchmarking_results","title":"parse_benchmark_result_summary"},{"location":"api/pheval/analyse/parse_benchmark_summary/#src.pheval.analyse.parse_benchmark_summary.read_benchmark_tsv_result_summary","text":"Read the summary benchmark TSV output generated from the benchmark-comparison command. Parameters: Name Type Description Default benchmarking_tsv Path Path to the summary benchmark TSV output file. required Returns: Type Description DataFrame pd.DataFrame: A pandas DataFrame containing specific columns from the TSV file, including: 'results_directory_path', 'top', 'top3', 'top5', 'top10', 'found', 'total', 'mean_reciprocal_rank'. Source code in src/pheval/analyse/parse_benchmark_summary.py 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 def read_benchmark_tsv_result_summary ( benchmarking_tsv : Path ) -> pd . DataFrame : \"\"\" Read the summary benchmark TSV output generated from the benchmark-comparison command. Args: benchmarking_tsv (Path): Path to the summary benchmark TSV output file. Returns: pd.DataFrame: A pandas DataFrame containing specific columns from the TSV file, including: 'results_directory_path', 'top', 'top3', 'top5', 'top10', 'found', 'total', 'mean_reciprocal_rank'. \"\"\" return pd . read_csv ( benchmarking_tsv , delimiter = \" \\t \" , usecols = [ \"results_directory_path\" , \"top\" , \"top3\" , \"top5\" , \"top10\" , \"found\" , \"total\" , \"mean_reciprocal_rank\" , ], )","title":"read_benchmark_tsv_result_summary"},{"location":"api/pheval/analyse/parse_pheval_result/","text":"parse_pheval_result ( data_class_type , pheval_result ) Parse PhEval result into specified dataclass type. Parameters: Name Type Description Default data_class_type PhEvalResult The data class type to parse the result into. required pheval_result List [ dict ] A list of dictionaries representing the PhEval result. required Returns: Type Description List [ PhEvalResult ] List[PhEvalResult]: A list of instances of the specified data class type, List [ PhEvalResult ] each instance representing a row in the PhEval result. Source code in src/pheval/analyse/parse_pheval_result.py 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 def parse_pheval_result ( data_class_type : PhEvalResult , pheval_result : List [ dict ] ) -> List [ PhEvalResult ]: \"\"\" Parse PhEval result into specified dataclass type. Args: data_class_type (PhEvalResult): The data class type to parse the result into. pheval_result (List[dict]): A list of dictionaries representing the PhEval result. Returns: List[PhEvalResult]: A list of instances of the specified data class type, each instance representing a row in the PhEval result. \"\"\" return [ data_class_type ( ** row ) for row in pheval_result ] read_standardised_result ( standardised_result_path ) Read the standardised result output and return a list of dictionaries. Parameters: Name Type Description Default standardised_result_path Path The path to the file containing the standardised result output. required Returns: Type Description List [ dict ] List[dict]: A list of dictionaries representing the content of the standardised result file. Source code in src/pheval/analyse/parse_pheval_result.py 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 def read_standardised_result ( standardised_result_path : Path ) -> List [ dict ]: \"\"\" Read the standardised result output and return a list of dictionaries. Args: standardised_result_path (Path): The path to the file containing the standardised result output. Returns: List[dict]: A list of dictionaries representing the content of the standardised result file. \"\"\" if standardised_result_path . is_file (): return pd . read_csv ( standardised_result_path , delimiter = \" \\t \" ) . to_dict ( \"records\" ) else : info_log . info ( f \"Could not find { standardised_result_path } \" ) return pd . DataFrame () . to_dict ( \"records\" )","title":"Parse pheval result"},{"location":"api/pheval/analyse/parse_pheval_result/#src.pheval.analyse.parse_pheval_result.parse_pheval_result","text":"Parse PhEval result into specified dataclass type. Parameters: Name Type Description Default data_class_type PhEvalResult The data class type to parse the result into. required pheval_result List [ dict ] A list of dictionaries representing the PhEval result. required Returns: Type Description List [ PhEvalResult ] List[PhEvalResult]: A list of instances of the specified data class type, List [ PhEvalResult ] each instance representing a row in the PhEval result. Source code in src/pheval/analyse/parse_pheval_result.py 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 def parse_pheval_result ( data_class_type : PhEvalResult , pheval_result : List [ dict ] ) -> List [ PhEvalResult ]: \"\"\" Parse PhEval result into specified dataclass type. Args: data_class_type (PhEvalResult): The data class type to parse the result into. pheval_result (List[dict]): A list of dictionaries representing the PhEval result. Returns: List[PhEvalResult]: A list of instances of the specified data class type, each instance representing a row in the PhEval result. \"\"\" return [ data_class_type ( ** row ) for row in pheval_result ]","title":"parse_pheval_result"},{"location":"api/pheval/analyse/parse_pheval_result/#src.pheval.analyse.parse_pheval_result.read_standardised_result","text":"Read the standardised result output and return a list of dictionaries. Parameters: Name Type Description Default standardised_result_path Path The path to the file containing the standardised result output. required Returns: Type Description List [ dict ] List[dict]: A list of dictionaries representing the content of the standardised result file. Source code in src/pheval/analyse/parse_pheval_result.py 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 def read_standardised_result ( standardised_result_path : Path ) -> List [ dict ]: \"\"\" Read the standardised result output and return a list of dictionaries. Args: standardised_result_path (Path): The path to the file containing the standardised result output. Returns: List[dict]: A list of dictionaries representing the content of the standardised result file. \"\"\" if standardised_result_path . is_file (): return pd . read_csv ( standardised_result_path , delimiter = \" \\t \" ) . to_dict ( \"records\" ) else : info_log . info ( f \"Could not find { standardised_result_path } \" ) return pd . DataFrame () . to_dict ( \"records\" )","title":"read_standardised_result"},{"location":"api/pheval/analyse/prioritisation_rank_recorder/","text":"PrioritisationRankRecorder dataclass Record ranks for different types of prioritisation results. Attributes: Name Type Description index int The index representing the run. directory Path The result directory path. run_comparison defaultdict The comparison dictionary to record ranks. Source code in src/pheval/analyse/prioritisation_rank_recorder.py 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 @dataclass class PrioritisationRankRecorder : \"\"\" Record ranks for different types of prioritisation results. Attributes: index (int): The index representing the run. directory (Path): The result directory path. prioritisation_result (Union[GenePrioritisationResult, VariantPrioritisationResult, DiseasePrioritisationResult]): The prioritisation result object. run_comparison (defaultdict): The comparison dictionary to record ranks. \"\"\" index : int directory : Path prioritisation_result : Union [ GenePrioritisationResult , VariantPrioritisationResult , DiseasePrioritisationResult ] run_comparison : defaultdict def _record_gene_rank ( self ) -> None : \"\"\" Record gene prioritisation rank. This method updates the 'Gene' key in the run comparison dictionary with the gene information extracted from the correct prioritisation result. \"\"\" self . run_comparison [ self . index ][ \"Gene\" ] = self . prioritisation_result . gene def _record_variant_rank ( self ) -> None : \"\"\" Record variant prioritisation rank. This method updates the 'Variant' key in the run comparison dictionary with the variant information extracted from the correct prioritisation result. \"\"\" variant = self . prioritisation_result . variant self . run_comparison [ self . index ][ \"Variant\" ] = \"-\" . join ( [ variant . chrom , str ( variant . pos ), variant . ref , variant . alt ] ) def _record_disease_rank ( self ) -> None : \"\"\" Record disease prioritisation rank. This method updates the 'Disease' key in the run comparison dictionary with the disease information extracted from the correct prioritisation result. \"\"\" self . run_comparison [ self . index ][ \"Disease\" ] = self . prioritisation_result . disease . disease_identifier def record_rank ( self ) -> None : \"\"\" Record the prioritisation ranks for different runs. It assigns the prioritisation rank and associated details such as phenopacket name and prioritisation result type ('Gene', 'Variant', or 'Disease') to the run comparison dictionary for each respective run, allowing comparison and analysis of the ranks of correct results across different runs. \"\"\" self . run_comparison [ self . index ][ \"Phenopacket\" ] = self . prioritisation_result . phenopacket_path . name if type ( self . prioritisation_result ) is GenePrioritisationResult : self . _record_gene_rank () elif type ( self . prioritisation_result ) is VariantPrioritisationResult : self . _record_variant_rank () elif type ( self . prioritisation_result ) is DiseasePrioritisationResult : self . _record_disease_rank () self . run_comparison [ self . index ][ self . directory ] = self . prioritisation_result . rank record_rank () Record the prioritisation ranks for different runs. It assigns the prioritisation rank and associated details such as phenopacket name and prioritisation result type ('Gene', 'Variant', or 'Disease') to the run comparison dictionary for each respective run, allowing comparison and analysis of the ranks of correct results across different runs. Source code in src/pheval/analyse/prioritisation_rank_recorder.py 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 def record_rank ( self ) -> None : \"\"\" Record the prioritisation ranks for different runs. It assigns the prioritisation rank and associated details such as phenopacket name and prioritisation result type ('Gene', 'Variant', or 'Disease') to the run comparison dictionary for each respective run, allowing comparison and analysis of the ranks of correct results across different runs. \"\"\" self . run_comparison [ self . index ][ \"Phenopacket\" ] = self . prioritisation_result . phenopacket_path . name if type ( self . prioritisation_result ) is GenePrioritisationResult : self . _record_gene_rank () elif type ( self . prioritisation_result ) is VariantPrioritisationResult : self . _record_variant_rank () elif type ( self . prioritisation_result ) is DiseasePrioritisationResult : self . _record_disease_rank () self . run_comparison [ self . index ][ self . directory ] = self . prioritisation_result . rank","title":"Prioritisation rank recorder"},{"location":"api/pheval/analyse/prioritisation_rank_recorder/#src.pheval.analyse.prioritisation_rank_recorder.PrioritisationRankRecorder","text":"Record ranks for different types of prioritisation results. Attributes: Name Type Description index int The index representing the run. directory Path The result directory path. run_comparison defaultdict The comparison dictionary to record ranks. Source code in src/pheval/analyse/prioritisation_rank_recorder.py 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 @dataclass class PrioritisationRankRecorder : \"\"\" Record ranks for different types of prioritisation results. Attributes: index (int): The index representing the run. directory (Path): The result directory path. prioritisation_result (Union[GenePrioritisationResult, VariantPrioritisationResult, DiseasePrioritisationResult]): The prioritisation result object. run_comparison (defaultdict): The comparison dictionary to record ranks. \"\"\" index : int directory : Path prioritisation_result : Union [ GenePrioritisationResult , VariantPrioritisationResult , DiseasePrioritisationResult ] run_comparison : defaultdict def _record_gene_rank ( self ) -> None : \"\"\" Record gene prioritisation rank. This method updates the 'Gene' key in the run comparison dictionary with the gene information extracted from the correct prioritisation result. \"\"\" self . run_comparison [ self . index ][ \"Gene\" ] = self . prioritisation_result . gene def _record_variant_rank ( self ) -> None : \"\"\" Record variant prioritisation rank. This method updates the 'Variant' key in the run comparison dictionary with the variant information extracted from the correct prioritisation result. \"\"\" variant = self . prioritisation_result . variant self . run_comparison [ self . index ][ \"Variant\" ] = \"-\" . join ( [ variant . chrom , str ( variant . pos ), variant . ref , variant . alt ] ) def _record_disease_rank ( self ) -> None : \"\"\" Record disease prioritisation rank. This method updates the 'Disease' key in the run comparison dictionary with the disease information extracted from the correct prioritisation result. \"\"\" self . run_comparison [ self . index ][ \"Disease\" ] = self . prioritisation_result . disease . disease_identifier def record_rank ( self ) -> None : \"\"\" Record the prioritisation ranks for different runs. It assigns the prioritisation rank and associated details such as phenopacket name and prioritisation result type ('Gene', 'Variant', or 'Disease') to the run comparison dictionary for each respective run, allowing comparison and analysis of the ranks of correct results across different runs. \"\"\" self . run_comparison [ self . index ][ \"Phenopacket\" ] = self . prioritisation_result . phenopacket_path . name if type ( self . prioritisation_result ) is GenePrioritisationResult : self . _record_gene_rank () elif type ( self . prioritisation_result ) is VariantPrioritisationResult : self . _record_variant_rank () elif type ( self . prioritisation_result ) is DiseasePrioritisationResult : self . _record_disease_rank () self . run_comparison [ self . index ][ self . directory ] = self . prioritisation_result . rank","title":"PrioritisationRankRecorder"},{"location":"api/pheval/analyse/prioritisation_rank_recorder/#src.pheval.analyse.prioritisation_rank_recorder.PrioritisationRankRecorder.record_rank","text":"Record the prioritisation ranks for different runs. It assigns the prioritisation rank and associated details such as phenopacket name and prioritisation result type ('Gene', 'Variant', or 'Disease') to the run comparison dictionary for each respective run, allowing comparison and analysis of the ranks of correct results across different runs. Source code in src/pheval/analyse/prioritisation_rank_recorder.py 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 def record_rank ( self ) -> None : \"\"\" Record the prioritisation ranks for different runs. It assigns the prioritisation rank and associated details such as phenopacket name and prioritisation result type ('Gene', 'Variant', or 'Disease') to the run comparison dictionary for each respective run, allowing comparison and analysis of the ranks of correct results across different runs. \"\"\" self . run_comparison [ self . index ][ \"Phenopacket\" ] = self . prioritisation_result . phenopacket_path . name if type ( self . prioritisation_result ) is GenePrioritisationResult : self . _record_gene_rank () elif type ( self . prioritisation_result ) is VariantPrioritisationResult : self . _record_variant_rank () elif type ( self . prioritisation_result ) is DiseasePrioritisationResult : self . _record_disease_rank () self . run_comparison [ self . index ][ self . directory ] = self . prioritisation_result . rank","title":"record_rank"},{"location":"api/pheval/analyse/prioritisation_result_types/","text":"DiseasePrioritisationResult dataclass Store rank data for known diseases. Attributes: Name Type Description phenopacket_path Path Path to the phenopacket. disease ProbandDisease The proband disease. rank int The assigned rank for the disease. Defaults to 0. Source code in src/pheval/analyse/prioritisation_result_types.py 39 40 41 42 43 44 45 46 47 48 49 50 51 52 @dataclass class DiseasePrioritisationResult : \"\"\" Store rank data for known diseases. Attributes: phenopacket_path (Path): Path to the phenopacket. disease (ProbandDisease): The proband disease. rank (int): The assigned rank for the disease. Defaults to 0. \"\"\" phenopacket_path : Path disease : ProbandDisease rank : int = 0 GenePrioritisationResult dataclass Store rank data for causative genes. Attributes: Name Type Description phenopacket_path Path Path to the phenopacket. gene str The causative gene. rank int The assigned rank for the gene. Defaults to 0. Source code in src/pheval/analyse/prioritisation_result_types.py 7 8 9 10 11 12 13 14 15 16 17 18 19 20 @dataclass class GenePrioritisationResult : \"\"\" Store rank data for causative genes. Attributes: phenopacket_path (Path): Path to the phenopacket. gene (str): The causative gene. rank (int): The assigned rank for the gene. Defaults to 0. \"\"\" phenopacket_path : Path gene : str rank : int = 0 VariantPrioritisationResult dataclass Store rank data for variants. Attributes: Name Type Description phenopacket_path Path Path to the phenopacket. variant GenomicVariant The genomic variant. rank int The assigned rank for the variant. Defaults to 0. Source code in src/pheval/analyse/prioritisation_result_types.py 23 24 25 26 27 28 29 30 31 32 33 34 35 36 @dataclass class VariantPrioritisationResult : \"\"\" Store rank data for variants. Attributes: phenopacket_path (Path): Path to the phenopacket. variant (GenomicVariant): The genomic variant. rank (int): The assigned rank for the variant. Defaults to 0. \"\"\" phenopacket_path : Path variant : GenomicVariant rank : int = 0","title":"Prioritisation result types"},{"location":"api/pheval/analyse/prioritisation_result_types/#src.pheval.analyse.prioritisation_result_types.DiseasePrioritisationResult","text":"Store rank data for known diseases. Attributes: Name Type Description phenopacket_path Path Path to the phenopacket. disease ProbandDisease The proband disease. rank int The assigned rank for the disease. Defaults to 0. Source code in src/pheval/analyse/prioritisation_result_types.py 39 40 41 42 43 44 45 46 47 48 49 50 51 52 @dataclass class DiseasePrioritisationResult : \"\"\" Store rank data for known diseases. Attributes: phenopacket_path (Path): Path to the phenopacket. disease (ProbandDisease): The proband disease. rank (int): The assigned rank for the disease. Defaults to 0. \"\"\" phenopacket_path : Path disease : ProbandDisease rank : int = 0","title":"DiseasePrioritisationResult"},{"location":"api/pheval/analyse/prioritisation_result_types/#src.pheval.analyse.prioritisation_result_types.GenePrioritisationResult","text":"Store rank data for causative genes. Attributes: Name Type Description phenopacket_path Path Path to the phenopacket. gene str The causative gene. rank int The assigned rank for the gene. Defaults to 0. Source code in src/pheval/analyse/prioritisation_result_types.py 7 8 9 10 11 12 13 14 15 16 17 18 19 20 @dataclass class GenePrioritisationResult : \"\"\" Store rank data for causative genes. Attributes: phenopacket_path (Path): Path to the phenopacket. gene (str): The causative gene. rank (int): The assigned rank for the gene. Defaults to 0. \"\"\" phenopacket_path : Path gene : str rank : int = 0","title":"GenePrioritisationResult"},{"location":"api/pheval/analyse/prioritisation_result_types/#src.pheval.analyse.prioritisation_result_types.VariantPrioritisationResult","text":"Store rank data for variants. Attributes: Name Type Description phenopacket_path Path Path to the phenopacket. variant GenomicVariant The genomic variant. rank int The assigned rank for the variant. Defaults to 0. Source code in src/pheval/analyse/prioritisation_result_types.py 23 24 25 26 27 28 29 30 31 32 33 34 35 36 @dataclass class VariantPrioritisationResult : \"\"\" Store rank data for variants. Attributes: phenopacket_path (Path): Path to the phenopacket. variant (GenomicVariant): The genomic variant. rank (int): The assigned rank for the variant. Defaults to 0. \"\"\" phenopacket_path : Path variant : GenomicVariant rank : int = 0","title":"VariantPrioritisationResult"},{"location":"api/pheval/analyse/rank_stats/","text":"RankStats dataclass Store statistics related to ranking. Attributes: Name Type Description top int Count of top-ranked matches. top3 int Count of matches within the top 3 ranks. top5 int Count of matches within the top 5 ranks. top10 int Count of matches within the top 10 ranks. found int Count of found matches. total int Total count of matches. reciprocal_ranks List [ float ] List of reciprocal ranks. relevant_ranks List [ List [ int ]] Nested list of ranks for the known entities for all cases in a run. mrr float Mean Reciprocal Rank (MRR). Defaults to None. Source code in src/pheval/analyse/rank_stats.py 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 @dataclass class RankStats : \"\"\"Store statistics related to ranking. Attributes: top (int): Count of top-ranked matches. top3 (int): Count of matches within the top 3 ranks. top5 (int): Count of matches within the top 5 ranks. top10 (int): Count of matches within the top 10 ranks. found (int): Count of found matches. total (int): Total count of matches. reciprocal_ranks (List[float]): List of reciprocal ranks. relevant_ranks List[List[int]]: Nested list of ranks for the known entities for all cases in a run. mrr (float): Mean Reciprocal Rank (MRR). Defaults to None. \"\"\" top : int = 0 top3 : int = 0 top5 : int = 0 top10 : int = 0 found : int = 0 total : int = 0 reciprocal_ranks : List = field ( default_factory = list ) relevant_result_ranks : List [ List [ int ]] = field ( default_factory = list ) mrr : float = None def add_rank ( self , rank : int ) -> None : \"\"\" Add rank for matched result. Args: rank (int): The rank value to be added. Notes: This method updates the internal attributes of the RankStats object based on the provided rank value. It calculates various statistics such as the count of top ranks (1, 3, 5, and 10), the total number of ranks found,and the reciprocal rank. This function modifies the object's state by updating the internal attributes. \"\"\" self . reciprocal_ranks . append ( 1 / rank ) self . found += 1 if rank == 1 : self . top += 1 if rank != \"\" and rank <= 3 : self . top3 += 1 if rank != \"\" and rank <= 5 : self . top5 += 1 if rank != \"\" and rank <= 10 : self . top10 += 1 def percentage_rank ( self , value : int ) -> float : \"\"\" Calculate the percentage rank. Args: value (int): The value for which the percentage rank needs to be calculated. Returns: float: The calculated percentage rank based on the provided value and the total count. \"\"\" return 100 * value / self . total def percentage_top ( self ) -> float : \"\"\" Calculate the percentage of top matches. Returns: float: The percentage of top matches compared to the total count. \"\"\" return self . percentage_rank ( self . top ) def percentage_top3 ( self ) -> float : \"\"\" Calculate the percentage of matches within the top 3. Returns: float: The percentage of matches within the top 3 compared to the total count. \"\"\" return self . percentage_rank ( self . top3 ) def percentage_top5 ( self ) -> float : \"\"\" Calculate the percentage of matches within the top 5. Returns: float: The percentage of matches within the top 5 compared to the total count. \"\"\" return self . percentage_rank ( self . top5 ) def percentage_top10 ( self ) -> float : \"\"\" Calculate the percentage of matches within the top 10. Returns: float: The percentage of matches within the top 10 compared to the total count. \"\"\" return self . percentage_rank ( self . top10 ) def percentage_found ( self ) -> float : \"\"\" Calculate the percentage of matches found. Returns: float: The percentage of matches found compared to the total count. \"\"\" return self . percentage_rank ( self . found ) @staticmethod def percentage_difference ( percentage_value_1 : float , percentage_value_2 : float ) -> float : \"\"\" Calculate the percentage difference between two percentage values. Args: percentage_value_1 (float): The first percentage value. percentage_value_2 (float): The second percentage value. Returns: float: The difference between the two percentage values. \"\"\" return percentage_value_1 - percentage_value_2 def mean_reciprocal_rank ( self ) -> float : \"\"\" Calculate the Mean Reciprocal Rank (MRR) for the stored ranks. The Mean Reciprocal Rank is computed as the mean of the reciprocal ranks for the found cases. If the total number of cases differs from the number of found cases, this method extends the reciprocal ranks list with zeroes for missing cases. Returns: float: The calculated Mean Reciprocal Rank. \"\"\" if len ( self . reciprocal_ranks ) != self . total : missing_cases = self . total - self . found self . reciprocal_ranks . extend ([ 0 ] * missing_cases ) return mean ( self . reciprocal_ranks ) return mean ( self . reciprocal_ranks ) def return_mean_reciprocal_rank ( self ) -> float : \"\"\" Retrieve or calculate the Mean Reciprocal Rank (MRR). If a pre-calculated MRR value exists (stored in the 'mrr' attribute), this method returns that value. Otherwise, it computes the Mean Reciprocal Rank using the 'mean_reciprocal_rank' method. Returns: float: The Mean Reciprocal Rank value. \"\"\" if self . mrr is not None : return self . mrr else : return self . mean_reciprocal_rank () def precision_at_k ( self , k : int ) -> float : \"\"\" Calculate the precision at k. Precision at k is the ratio of relevant items in the top-k predictions to the total number of predictions. It measures the accuracy of the top-k predictions made by a model. Args: k (int): The number of top predictions to consider. Returns: float: The precision at k, ranging from 0.0 to 1.0. A higher precision indicates a better performance in identifying relevant items in the top-k predictions. \"\"\" k_attr = getattr ( self , f \"top { k } \" ) if k > 1 else self . top return k_attr / ( self . total * k ) @staticmethod def _average_precision_at_k ( number_of_relevant_entities_at_k : int , precision_at_k : float ) -> float : \"\"\" Calculate the Average Precision at k. Average Precision at k (AP@k) is a metric used to evaluate the precision of a ranked retrieval system. It measures the precision at each relevant position up to k and takes the average. Args: number_of_relevant_entities_at_k (int): The count of relevant entities in the top-k predictions. precision_at_k (float): The precision at k - the sum of the precision values at each relevant position. Returns: float: The Average Precision at k, ranging from 0.0 to 1.0. A higher value indicates better precision in the top-k predictions. \"\"\" return ( ( 1 / number_of_relevant_entities_at_k ) * precision_at_k if number_of_relevant_entities_at_k > 0 else 0.0 ) def mean_average_precision_at_k ( self , k : int ) -> float : \"\"\" Calculate the Mean Average Precision at k. Mean Average Precision at k (MAP@k) is a performance metric for ranked data. It calculates the average precision at k for each result rank and then takes the mean across all queries. Args: k (int): The number of top predictions to consider for precision calculation. Returns: float: The Mean Average Precision at k, ranging from 0.0 to 1.0. A higher value indicates better performance in ranking relevant entities higher in the predictions. \"\"\" cumulative_average_precision_scores = 0 for result_ranks in self . relevant_result_ranks : precision_at_k , number_of_relevant_entities_at_k = 0 , 0 for rank in result_ranks : if 0 < rank <= k : number_of_relevant_entities_at_k += 1 precision_at_k += number_of_relevant_entities_at_k / rank cumulative_average_precision_scores += self . _average_precision_at_k ( number_of_relevant_entities_at_k , precision_at_k ) return ( 1 / self . total ) * cumulative_average_precision_scores def f_beta_score_at_k ( self , percentage_at_k : float , k : int ) -> float : \"\"\" Calculate the F-beta score at k. The F-beta score is a metric that combines precision and recall, with beta controlling the emphasis on precision. The Beta value is set to the value of 1 to allow for equal weighting for both precision and recall. This method computes the F-beta score at a specific percentage threshold within the top-k predictions. Args: percentage_at_k (float): The percentage of true positive predictions within the top-k. k (int): The number of top predictions to consider. Returns: float: The F-beta score at k, ranging from 0.0 to 1.0. A higher score indicates better trade-off between precision and recall. \"\"\" precision = self . precision_at_k ( k ) recall_at_k = percentage_at_k / 100 return ( ( 2 * precision * recall_at_k ) / ( precision + recall_at_k ) if ( precision + recall_at_k ) > 0 else 0 ) def mean_normalised_discounted_cumulative_gain ( self , k : int ) -> float : \"\"\" Calculate the mean Normalised Discounted Cumulative Gain (NDCG) for a given rank cutoff. NDCG measures the effectiveness of a ranking by considering both the relevance and the order of items. Args: k (int): The rank cutoff for calculating NDCG. Returns: float: The mean NDCG score across all query results. \"\"\" ndcg_scores = [] for result_ranks in self . relevant_result_ranks : result_ranks = [ rank for rank in result_ranks if rank <= k ] result_ranks = [ 3 if i in result_ranks else 0 for i in range ( k )] ideal_ranking = sorted ( result_ranks , reverse = True ) ndcg_scores . append ( ndcg_score ( np . asarray ([ ideal_ranking ]), np . asarray ([ result_ranks ]))) return np . mean ( ndcg_scores ) add_rank ( rank ) Add rank for matched result. Parameters: Name Type Description Default rank int The rank value to be added. required Notes This method updates the internal attributes of the RankStats object based on the provided rank value. It calculates various statistics such as the count of top ranks (1, 3, 5, and 10), the total number of ranks found,and the reciprocal rank. This function modifies the object's state by updating the internal attributes. Source code in src/pheval/analyse/rank_stats.py 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 def add_rank ( self , rank : int ) -> None : \"\"\" Add rank for matched result. Args: rank (int): The rank value to be added. Notes: This method updates the internal attributes of the RankStats object based on the provided rank value. It calculates various statistics such as the count of top ranks (1, 3, 5, and 10), the total number of ranks found,and the reciprocal rank. This function modifies the object's state by updating the internal attributes. \"\"\" self . reciprocal_ranks . append ( 1 / rank ) self . found += 1 if rank == 1 : self . top += 1 if rank != \"\" and rank <= 3 : self . top3 += 1 if rank != \"\" and rank <= 5 : self . top5 += 1 if rank != \"\" and rank <= 10 : self . top10 += 1 f_beta_score_at_k ( percentage_at_k , k ) Calculate the F-beta score at k. The F-beta score is a metric that combines precision and recall, with beta controlling the emphasis on precision. The Beta value is set to the value of 1 to allow for equal weighting for both precision and recall. This method computes the F-beta score at a specific percentage threshold within the top-k predictions. Parameters: Name Type Description Default percentage_at_k float The percentage of true positive predictions within the top-k. required k int The number of top predictions to consider. required Returns: Name Type Description float float The F-beta score at k, ranging from 0.0 to 1.0. A higher score indicates better trade-off between precision and recall. Source code in src/pheval/analyse/rank_stats.py 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 def f_beta_score_at_k ( self , percentage_at_k : float , k : int ) -> float : \"\"\" Calculate the F-beta score at k. The F-beta score is a metric that combines precision and recall, with beta controlling the emphasis on precision. The Beta value is set to the value of 1 to allow for equal weighting for both precision and recall. This method computes the F-beta score at a specific percentage threshold within the top-k predictions. Args: percentage_at_k (float): The percentage of true positive predictions within the top-k. k (int): The number of top predictions to consider. Returns: float: The F-beta score at k, ranging from 0.0 to 1.0. A higher score indicates better trade-off between precision and recall. \"\"\" precision = self . precision_at_k ( k ) recall_at_k = percentage_at_k / 100 return ( ( 2 * precision * recall_at_k ) / ( precision + recall_at_k ) if ( precision + recall_at_k ) > 0 else 0 ) mean_average_precision_at_k ( k ) Calculate the Mean Average Precision at k. Mean Average Precision at k (MAP@k) is a performance metric for ranked data. It calculates the average precision at k for each result rank and then takes the mean across all queries. Parameters: Name Type Description Default k int The number of top predictions to consider for precision calculation. required Returns: Name Type Description float float The Mean Average Precision at k, ranging from 0.0 to 1.0. A higher value indicates better performance in ranking relevant entities higher in the predictions. Source code in src/pheval/analyse/rank_stats.py 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 def mean_average_precision_at_k ( self , k : int ) -> float : \"\"\" Calculate the Mean Average Precision at k. Mean Average Precision at k (MAP@k) is a performance metric for ranked data. It calculates the average precision at k for each result rank and then takes the mean across all queries. Args: k (int): The number of top predictions to consider for precision calculation. Returns: float: The Mean Average Precision at k, ranging from 0.0 to 1.0. A higher value indicates better performance in ranking relevant entities higher in the predictions. \"\"\" cumulative_average_precision_scores = 0 for result_ranks in self . relevant_result_ranks : precision_at_k , number_of_relevant_entities_at_k = 0 , 0 for rank in result_ranks : if 0 < rank <= k : number_of_relevant_entities_at_k += 1 precision_at_k += number_of_relevant_entities_at_k / rank cumulative_average_precision_scores += self . _average_precision_at_k ( number_of_relevant_entities_at_k , precision_at_k ) return ( 1 / self . total ) * cumulative_average_precision_scores mean_normalised_discounted_cumulative_gain ( k ) Calculate the mean Normalised Discounted Cumulative Gain (NDCG) for a given rank cutoff. NDCG measures the effectiveness of a ranking by considering both the relevance and the order of items. Parameters: Name Type Description Default k int The rank cutoff for calculating NDCG. required Returns: Name Type Description float float The mean NDCG score across all query results. Source code in src/pheval/analyse/rank_stats.py 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 def mean_normalised_discounted_cumulative_gain ( self , k : int ) -> float : \"\"\" Calculate the mean Normalised Discounted Cumulative Gain (NDCG) for a given rank cutoff. NDCG measures the effectiveness of a ranking by considering both the relevance and the order of items. Args: k (int): The rank cutoff for calculating NDCG. Returns: float: The mean NDCG score across all query results. \"\"\" ndcg_scores = [] for result_ranks in self . relevant_result_ranks : result_ranks = [ rank for rank in result_ranks if rank <= k ] result_ranks = [ 3 if i in result_ranks else 0 for i in range ( k )] ideal_ranking = sorted ( result_ranks , reverse = True ) ndcg_scores . append ( ndcg_score ( np . asarray ([ ideal_ranking ]), np . asarray ([ result_ranks ]))) return np . mean ( ndcg_scores ) mean_reciprocal_rank () Calculate the Mean Reciprocal Rank (MRR) for the stored ranks. The Mean Reciprocal Rank is computed as the mean of the reciprocal ranks for the found cases. If the total number of cases differs from the number of found cases, this method extends the reciprocal ranks list with zeroes for missing cases. Returns: Name Type Description float float The calculated Mean Reciprocal Rank. Source code in src/pheval/analyse/rank_stats.py 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 def mean_reciprocal_rank ( self ) -> float : \"\"\" Calculate the Mean Reciprocal Rank (MRR) for the stored ranks. The Mean Reciprocal Rank is computed as the mean of the reciprocal ranks for the found cases. If the total number of cases differs from the number of found cases, this method extends the reciprocal ranks list with zeroes for missing cases. Returns: float: The calculated Mean Reciprocal Rank. \"\"\" if len ( self . reciprocal_ranks ) != self . total : missing_cases = self . total - self . found self . reciprocal_ranks . extend ([ 0 ] * missing_cases ) return mean ( self . reciprocal_ranks ) return mean ( self . reciprocal_ranks ) percentage_difference ( percentage_value_1 , percentage_value_2 ) staticmethod Calculate the percentage difference between two percentage values. Parameters: Name Type Description Default percentage_value_1 float The first percentage value. required percentage_value_2 float The second percentage value. required Returns: Name Type Description float float The difference between the two percentage values. Source code in src/pheval/analyse/rank_stats.py 120 121 122 123 124 125 126 127 128 129 130 131 132 @staticmethod def percentage_difference ( percentage_value_1 : float , percentage_value_2 : float ) -> float : \"\"\" Calculate the percentage difference between two percentage values. Args: percentage_value_1 (float): The first percentage value. percentage_value_2 (float): The second percentage value. Returns: float: The difference between the two percentage values. \"\"\" return percentage_value_1 - percentage_value_2 percentage_found () Calculate the percentage of matches found. Returns: Name Type Description float float The percentage of matches found compared to the total count. Source code in src/pheval/analyse/rank_stats.py 111 112 113 114 115 116 117 118 def percentage_found ( self ) -> float : \"\"\" Calculate the percentage of matches found. Returns: float: The percentage of matches found compared to the total count. \"\"\" return self . percentage_rank ( self . found ) percentage_rank ( value ) Calculate the percentage rank. Parameters: Name Type Description Default value int The value for which the percentage rank needs to be calculated. required Returns: Name Type Description float float The calculated percentage rank based on the provided value and the total count. Source code in src/pheval/analyse/rank_stats.py 63 64 65 66 67 68 69 70 71 72 73 def percentage_rank ( self , value : int ) -> float : \"\"\" Calculate the percentage rank. Args: value (int): The value for which the percentage rank needs to be calculated. Returns: float: The calculated percentage rank based on the provided value and the total count. \"\"\" return 100 * value / self . total percentage_top () Calculate the percentage of top matches. Returns: Name Type Description float float The percentage of top matches compared to the total count. Source code in src/pheval/analyse/rank_stats.py 75 76 77 78 79 80 81 82 def percentage_top ( self ) -> float : \"\"\" Calculate the percentage of top matches. Returns: float: The percentage of top matches compared to the total count. \"\"\" return self . percentage_rank ( self . top ) percentage_top10 () Calculate the percentage of matches within the top 10. Returns: Name Type Description float float The percentage of matches within the top 10 compared to the total count. Source code in src/pheval/analyse/rank_stats.py 102 103 104 105 106 107 108 109 def percentage_top10 ( self ) -> float : \"\"\" Calculate the percentage of matches within the top 10. Returns: float: The percentage of matches within the top 10 compared to the total count. \"\"\" return self . percentage_rank ( self . top10 ) percentage_top3 () Calculate the percentage of matches within the top 3. Returns: Name Type Description float float The percentage of matches within the top 3 compared to the total count. Source code in src/pheval/analyse/rank_stats.py 84 85 86 87 88 89 90 91 def percentage_top3 ( self ) -> float : \"\"\" Calculate the percentage of matches within the top 3. Returns: float: The percentage of matches within the top 3 compared to the total count. \"\"\" return self . percentage_rank ( self . top3 ) percentage_top5 () Calculate the percentage of matches within the top 5. Returns: Name Type Description float float The percentage of matches within the top 5 compared to the total count. Source code in src/pheval/analyse/rank_stats.py 93 94 95 96 97 98 99 100 def percentage_top5 ( self ) -> float : \"\"\" Calculate the percentage of matches within the top 5. Returns: float: The percentage of matches within the top 5 compared to the total count. \"\"\" return self . percentage_rank ( self . top5 ) precision_at_k ( k ) Calculate the precision at k. Precision at k is the ratio of relevant items in the top-k predictions to the total number of predictions. It measures the accuracy of the top-k predictions made by a model. Parameters: Name Type Description Default k int The number of top predictions to consider. required Returns: Name Type Description float float The precision at k, ranging from 0.0 to 1.0. float A higher precision indicates a better performance in identifying relevant items in the top-k predictions. Source code in src/pheval/analyse/rank_stats.py 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 def precision_at_k ( self , k : int ) -> float : \"\"\" Calculate the precision at k. Precision at k is the ratio of relevant items in the top-k predictions to the total number of predictions. It measures the accuracy of the top-k predictions made by a model. Args: k (int): The number of top predictions to consider. Returns: float: The precision at k, ranging from 0.0 to 1.0. A higher precision indicates a better performance in identifying relevant items in the top-k predictions. \"\"\" k_attr = getattr ( self , f \"top { k } \" ) if k > 1 else self . top return k_attr / ( self . total * k ) return_mean_reciprocal_rank () Retrieve or calculate the Mean Reciprocal Rank (MRR). If a pre-calculated MRR value exists (stored in the 'mrr' attribute), this method returns that value. Otherwise, it computes the Mean Reciprocal Rank using the 'mean_reciprocal_rank' method. Returns: Name Type Description float float The Mean Reciprocal Rank value. Source code in src/pheval/analyse/rank_stats.py 153 154 155 156 157 158 159 160 161 162 163 164 165 166 def return_mean_reciprocal_rank ( self ) -> float : \"\"\" Retrieve or calculate the Mean Reciprocal Rank (MRR). If a pre-calculated MRR value exists (stored in the 'mrr' attribute), this method returns that value. Otherwise, it computes the Mean Reciprocal Rank using the 'mean_reciprocal_rank' method. Returns: float: The Mean Reciprocal Rank value. \"\"\" if self . mrr is not None : return self . mrr else : return self . mean_reciprocal_rank () RankStatsWriter Class for writing the rank stats to a file. Source code in src/pheval/analyse/rank_stats.py 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 class RankStatsWriter : \"\"\"Class for writing the rank stats to a file.\"\"\" def __init__ ( self , file : Path ): \"\"\" Initialise the RankStatsWriter class Args: file (Path): Path to the file where rank stats will be written \"\"\" self . file = open ( file , \"w\" ) self . writer = csv . writer ( self . file , delimiter = \" \\t \" ) self . writer . writerow ( [ \"results_directory_path\" , \"top\" , \"top3\" , \"top5\" , \"top10\" , \"found\" , \"total\" , \"mean_reciprocal_rank\" , \"percentage_top\" , \"percentage_top3\" , \"percentage_top5\" , \"percentage_top10\" , \"percentage_found\" , \"precision@1\" , \"precision@3\" , \"precision@5\" , \"precision@10\" , \"MAP@1\" , \"MAP@3\" , \"MAP@5\" , \"MAP@10\" , \"f_beta_score@1\" , \"f_beta_score@3\" , \"f_beta_score@5\" , \"f_beta_score@10\" , \"NDCG@3\" , \"NDCG@5\" , \"NDCG@10\" , \"true_positives\" , \"false_positives\" , \"true_negatives\" , \"false_negatives\" , \"sensitivity\" , \"specificity\" , \"precision\" , \"negative_predictive_value\" , \"false_positive_rate\" , \"false_discovery_rate\" , \"false_negative_rate\" , \"accuracy\" , \"f1_score\" , \"matthews_correlation_coefficient\" , ] ) def write_row ( self , directory : Path , rank_stats : RankStats , binary_classification : BinaryClassificationStats , ) -> None : \"\"\" Write summary rank statistics row for a run to the file. Args: directory (Path): Path to the results directory corresponding to the run rank_stats (RankStats): RankStats instance containing rank statistics corresponding to the run Raises: IOError: If there is an error writing to the file. \"\"\" try : self . writer . writerow ( [ directory , rank_stats . top , rank_stats . top3 , rank_stats . top5 , rank_stats . top10 , rank_stats . found , rank_stats . total , rank_stats . mean_reciprocal_rank (), rank_stats . percentage_top (), rank_stats . percentage_top3 (), rank_stats . percentage_top5 (), rank_stats . percentage_top10 (), rank_stats . percentage_found (), rank_stats . precision_at_k ( 1 ), rank_stats . precision_at_k ( 3 ), rank_stats . precision_at_k ( 5 ), rank_stats . precision_at_k ( 10 ), rank_stats . mean_average_precision_at_k ( 1 ), rank_stats . mean_average_precision_at_k ( 3 ), rank_stats . mean_average_precision_at_k ( 5 ), rank_stats . mean_average_precision_at_k ( 10 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top (), 1 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top3 (), 3 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top5 (), 5 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top10 (), 10 ), rank_stats . mean_normalised_discounted_cumulative_gain ( 3 ), rank_stats . mean_normalised_discounted_cumulative_gain ( 5 ), rank_stats . mean_normalised_discounted_cumulative_gain ( 10 ), binary_classification . true_positives , binary_classification . false_positives , binary_classification . true_negatives , binary_classification . false_negatives , binary_classification . sensitivity (), binary_classification . specificity (), binary_classification . precision (), binary_classification . negative_predictive_value (), binary_classification . false_positive_rate (), binary_classification . false_discovery_rate (), binary_classification . false_negative_rate (), binary_classification . accuracy (), binary_classification . f1_score (), binary_classification . matthews_correlation_coefficient (), ] ) except IOError : print ( \"Error writing \" , self . file ) def close ( self ) -> None : \"\"\" Close the file used for writing rank statistics. Raises: IOError: If there's an error while closing the file. \"\"\" try : self . file . close () except IOError : print ( \"Error closing \" , self . file ) __init__ ( file ) Initialise the RankStatsWriter class Args: file (Path): Path to the file where rank stats will be written Source code in src/pheval/analyse/rank_stats.py 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 def __init__ ( self , file : Path ): \"\"\" Initialise the RankStatsWriter class Args: file (Path): Path to the file where rank stats will be written \"\"\" self . file = open ( file , \"w\" ) self . writer = csv . writer ( self . file , delimiter = \" \\t \" ) self . writer . writerow ( [ \"results_directory_path\" , \"top\" , \"top3\" , \"top5\" , \"top10\" , \"found\" , \"total\" , \"mean_reciprocal_rank\" , \"percentage_top\" , \"percentage_top3\" , \"percentage_top5\" , \"percentage_top10\" , \"percentage_found\" , \"precision@1\" , \"precision@3\" , \"precision@5\" , \"precision@10\" , \"MAP@1\" , \"MAP@3\" , \"MAP@5\" , \"MAP@10\" , \"f_beta_score@1\" , \"f_beta_score@3\" , \"f_beta_score@5\" , \"f_beta_score@10\" , \"NDCG@3\" , \"NDCG@5\" , \"NDCG@10\" , \"true_positives\" , \"false_positives\" , \"true_negatives\" , \"false_negatives\" , \"sensitivity\" , \"specificity\" , \"precision\" , \"negative_predictive_value\" , \"false_positive_rate\" , \"false_discovery_rate\" , \"false_negative_rate\" , \"accuracy\" , \"f1_score\" , \"matthews_correlation_coefficient\" , ] ) close () Close the file used for writing rank statistics. Raises: Type Description IOError If there's an error while closing the file. Source code in src/pheval/analyse/rank_stats.py 404 405 406 407 408 409 410 411 412 413 414 def close ( self ) -> None : \"\"\" Close the file used for writing rank statistics. Raises: IOError: If there's an error while closing the file. \"\"\" try : self . file . close () except IOError : print ( \"Error closing \" , self . file ) write_row ( directory , rank_stats , binary_classification ) Write summary rank statistics row for a run to the file. Parameters: Name Type Description Default directory Path Path to the results directory corresponding to the run required rank_stats RankStats RankStats instance containing rank statistics corresponding to the run required Raises: Type Description IOError If there is an error writing to the file. Source code in src/pheval/analyse/rank_stats.py 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 def write_row ( self , directory : Path , rank_stats : RankStats , binary_classification : BinaryClassificationStats , ) -> None : \"\"\" Write summary rank statistics row for a run to the file. Args: directory (Path): Path to the results directory corresponding to the run rank_stats (RankStats): RankStats instance containing rank statistics corresponding to the run Raises: IOError: If there is an error writing to the file. \"\"\" try : self . writer . writerow ( [ directory , rank_stats . top , rank_stats . top3 , rank_stats . top5 , rank_stats . top10 , rank_stats . found , rank_stats . total , rank_stats . mean_reciprocal_rank (), rank_stats . percentage_top (), rank_stats . percentage_top3 (), rank_stats . percentage_top5 (), rank_stats . percentage_top10 (), rank_stats . percentage_found (), rank_stats . precision_at_k ( 1 ), rank_stats . precision_at_k ( 3 ), rank_stats . precision_at_k ( 5 ), rank_stats . precision_at_k ( 10 ), rank_stats . mean_average_precision_at_k ( 1 ), rank_stats . mean_average_precision_at_k ( 3 ), rank_stats . mean_average_precision_at_k ( 5 ), rank_stats . mean_average_precision_at_k ( 10 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top (), 1 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top3 (), 3 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top5 (), 5 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top10 (), 10 ), rank_stats . mean_normalised_discounted_cumulative_gain ( 3 ), rank_stats . mean_normalised_discounted_cumulative_gain ( 5 ), rank_stats . mean_normalised_discounted_cumulative_gain ( 10 ), binary_classification . true_positives , binary_classification . false_positives , binary_classification . true_negatives , binary_classification . false_negatives , binary_classification . sensitivity (), binary_classification . specificity (), binary_classification . precision (), binary_classification . negative_predictive_value (), binary_classification . false_positive_rate (), binary_classification . false_discovery_rate (), binary_classification . false_negative_rate (), binary_classification . accuracy (), binary_classification . f1_score (), binary_classification . matthews_correlation_coefficient (), ] ) except IOError : print ( \"Error writing \" , self . file )","title":"Rank stats"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats","text":"Store statistics related to ranking. Attributes: Name Type Description top int Count of top-ranked matches. top3 int Count of matches within the top 3 ranks. top5 int Count of matches within the top 5 ranks. top10 int Count of matches within the top 10 ranks. found int Count of found matches. total int Total count of matches. reciprocal_ranks List [ float ] List of reciprocal ranks. relevant_ranks List [ List [ int ]] Nested list of ranks for the known entities for all cases in a run. mrr float Mean Reciprocal Rank (MRR). Defaults to None. Source code in src/pheval/analyse/rank_stats.py 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 @dataclass class RankStats : \"\"\"Store statistics related to ranking. Attributes: top (int): Count of top-ranked matches. top3 (int): Count of matches within the top 3 ranks. top5 (int): Count of matches within the top 5 ranks. top10 (int): Count of matches within the top 10 ranks. found (int): Count of found matches. total (int): Total count of matches. reciprocal_ranks (List[float]): List of reciprocal ranks. relevant_ranks List[List[int]]: Nested list of ranks for the known entities for all cases in a run. mrr (float): Mean Reciprocal Rank (MRR). Defaults to None. \"\"\" top : int = 0 top3 : int = 0 top5 : int = 0 top10 : int = 0 found : int = 0 total : int = 0 reciprocal_ranks : List = field ( default_factory = list ) relevant_result_ranks : List [ List [ int ]] = field ( default_factory = list ) mrr : float = None def add_rank ( self , rank : int ) -> None : \"\"\" Add rank for matched result. Args: rank (int): The rank value to be added. Notes: This method updates the internal attributes of the RankStats object based on the provided rank value. It calculates various statistics such as the count of top ranks (1, 3, 5, and 10), the total number of ranks found,and the reciprocal rank. This function modifies the object's state by updating the internal attributes. \"\"\" self . reciprocal_ranks . append ( 1 / rank ) self . found += 1 if rank == 1 : self . top += 1 if rank != \"\" and rank <= 3 : self . top3 += 1 if rank != \"\" and rank <= 5 : self . top5 += 1 if rank != \"\" and rank <= 10 : self . top10 += 1 def percentage_rank ( self , value : int ) -> float : \"\"\" Calculate the percentage rank. Args: value (int): The value for which the percentage rank needs to be calculated. Returns: float: The calculated percentage rank based on the provided value and the total count. \"\"\" return 100 * value / self . total def percentage_top ( self ) -> float : \"\"\" Calculate the percentage of top matches. Returns: float: The percentage of top matches compared to the total count. \"\"\" return self . percentage_rank ( self . top ) def percentage_top3 ( self ) -> float : \"\"\" Calculate the percentage of matches within the top 3. Returns: float: The percentage of matches within the top 3 compared to the total count. \"\"\" return self . percentage_rank ( self . top3 ) def percentage_top5 ( self ) -> float : \"\"\" Calculate the percentage of matches within the top 5. Returns: float: The percentage of matches within the top 5 compared to the total count. \"\"\" return self . percentage_rank ( self . top5 ) def percentage_top10 ( self ) -> float : \"\"\" Calculate the percentage of matches within the top 10. Returns: float: The percentage of matches within the top 10 compared to the total count. \"\"\" return self . percentage_rank ( self . top10 ) def percentage_found ( self ) -> float : \"\"\" Calculate the percentage of matches found. Returns: float: The percentage of matches found compared to the total count. \"\"\" return self . percentage_rank ( self . found ) @staticmethod def percentage_difference ( percentage_value_1 : float , percentage_value_2 : float ) -> float : \"\"\" Calculate the percentage difference between two percentage values. Args: percentage_value_1 (float): The first percentage value. percentage_value_2 (float): The second percentage value. Returns: float: The difference between the two percentage values. \"\"\" return percentage_value_1 - percentage_value_2 def mean_reciprocal_rank ( self ) -> float : \"\"\" Calculate the Mean Reciprocal Rank (MRR) for the stored ranks. The Mean Reciprocal Rank is computed as the mean of the reciprocal ranks for the found cases. If the total number of cases differs from the number of found cases, this method extends the reciprocal ranks list with zeroes for missing cases. Returns: float: The calculated Mean Reciprocal Rank. \"\"\" if len ( self . reciprocal_ranks ) != self . total : missing_cases = self . total - self . found self . reciprocal_ranks . extend ([ 0 ] * missing_cases ) return mean ( self . reciprocal_ranks ) return mean ( self . reciprocal_ranks ) def return_mean_reciprocal_rank ( self ) -> float : \"\"\" Retrieve or calculate the Mean Reciprocal Rank (MRR). If a pre-calculated MRR value exists (stored in the 'mrr' attribute), this method returns that value. Otherwise, it computes the Mean Reciprocal Rank using the 'mean_reciprocal_rank' method. Returns: float: The Mean Reciprocal Rank value. \"\"\" if self . mrr is not None : return self . mrr else : return self . mean_reciprocal_rank () def precision_at_k ( self , k : int ) -> float : \"\"\" Calculate the precision at k. Precision at k is the ratio of relevant items in the top-k predictions to the total number of predictions. It measures the accuracy of the top-k predictions made by a model. Args: k (int): The number of top predictions to consider. Returns: float: The precision at k, ranging from 0.0 to 1.0. A higher precision indicates a better performance in identifying relevant items in the top-k predictions. \"\"\" k_attr = getattr ( self , f \"top { k } \" ) if k > 1 else self . top return k_attr / ( self . total * k ) @staticmethod def _average_precision_at_k ( number_of_relevant_entities_at_k : int , precision_at_k : float ) -> float : \"\"\" Calculate the Average Precision at k. Average Precision at k (AP@k) is a metric used to evaluate the precision of a ranked retrieval system. It measures the precision at each relevant position up to k and takes the average. Args: number_of_relevant_entities_at_k (int): The count of relevant entities in the top-k predictions. precision_at_k (float): The precision at k - the sum of the precision values at each relevant position. Returns: float: The Average Precision at k, ranging from 0.0 to 1.0. A higher value indicates better precision in the top-k predictions. \"\"\" return ( ( 1 / number_of_relevant_entities_at_k ) * precision_at_k if number_of_relevant_entities_at_k > 0 else 0.0 ) def mean_average_precision_at_k ( self , k : int ) -> float : \"\"\" Calculate the Mean Average Precision at k. Mean Average Precision at k (MAP@k) is a performance metric for ranked data. It calculates the average precision at k for each result rank and then takes the mean across all queries. Args: k (int): The number of top predictions to consider for precision calculation. Returns: float: The Mean Average Precision at k, ranging from 0.0 to 1.0. A higher value indicates better performance in ranking relevant entities higher in the predictions. \"\"\" cumulative_average_precision_scores = 0 for result_ranks in self . relevant_result_ranks : precision_at_k , number_of_relevant_entities_at_k = 0 , 0 for rank in result_ranks : if 0 < rank <= k : number_of_relevant_entities_at_k += 1 precision_at_k += number_of_relevant_entities_at_k / rank cumulative_average_precision_scores += self . _average_precision_at_k ( number_of_relevant_entities_at_k , precision_at_k ) return ( 1 / self . total ) * cumulative_average_precision_scores def f_beta_score_at_k ( self , percentage_at_k : float , k : int ) -> float : \"\"\" Calculate the F-beta score at k. The F-beta score is a metric that combines precision and recall, with beta controlling the emphasis on precision. The Beta value is set to the value of 1 to allow for equal weighting for both precision and recall. This method computes the F-beta score at a specific percentage threshold within the top-k predictions. Args: percentage_at_k (float): The percentage of true positive predictions within the top-k. k (int): The number of top predictions to consider. Returns: float: The F-beta score at k, ranging from 0.0 to 1.0. A higher score indicates better trade-off between precision and recall. \"\"\" precision = self . precision_at_k ( k ) recall_at_k = percentage_at_k / 100 return ( ( 2 * precision * recall_at_k ) / ( precision + recall_at_k ) if ( precision + recall_at_k ) > 0 else 0 ) def mean_normalised_discounted_cumulative_gain ( self , k : int ) -> float : \"\"\" Calculate the mean Normalised Discounted Cumulative Gain (NDCG) for a given rank cutoff. NDCG measures the effectiveness of a ranking by considering both the relevance and the order of items. Args: k (int): The rank cutoff for calculating NDCG. Returns: float: The mean NDCG score across all query results. \"\"\" ndcg_scores = [] for result_ranks in self . relevant_result_ranks : result_ranks = [ rank for rank in result_ranks if rank <= k ] result_ranks = [ 3 if i in result_ranks else 0 for i in range ( k )] ideal_ranking = sorted ( result_ranks , reverse = True ) ndcg_scores . append ( ndcg_score ( np . asarray ([ ideal_ranking ]), np . asarray ([ result_ranks ]))) return np . mean ( ndcg_scores )","title":"RankStats"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.add_rank","text":"Add rank for matched result. Parameters: Name Type Description Default rank int The rank value to be added. required Notes This method updates the internal attributes of the RankStats object based on the provided rank value. It calculates various statistics such as the count of top ranks (1, 3, 5, and 10), the total number of ranks found,and the reciprocal rank. This function modifies the object's state by updating the internal attributes. Source code in src/pheval/analyse/rank_stats.py 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 def add_rank ( self , rank : int ) -> None : \"\"\" Add rank for matched result. Args: rank (int): The rank value to be added. Notes: This method updates the internal attributes of the RankStats object based on the provided rank value. It calculates various statistics such as the count of top ranks (1, 3, 5, and 10), the total number of ranks found,and the reciprocal rank. This function modifies the object's state by updating the internal attributes. \"\"\" self . reciprocal_ranks . append ( 1 / rank ) self . found += 1 if rank == 1 : self . top += 1 if rank != \"\" and rank <= 3 : self . top3 += 1 if rank != \"\" and rank <= 5 : self . top5 += 1 if rank != \"\" and rank <= 10 : self . top10 += 1","title":"add_rank"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.f_beta_score_at_k","text":"Calculate the F-beta score at k. The F-beta score is a metric that combines precision and recall, with beta controlling the emphasis on precision. The Beta value is set to the value of 1 to allow for equal weighting for both precision and recall. This method computes the F-beta score at a specific percentage threshold within the top-k predictions. Parameters: Name Type Description Default percentage_at_k float The percentage of true positive predictions within the top-k. required k int The number of top predictions to consider. required Returns: Name Type Description float float The F-beta score at k, ranging from 0.0 to 1.0. A higher score indicates better trade-off between precision and recall. Source code in src/pheval/analyse/rank_stats.py 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 def f_beta_score_at_k ( self , percentage_at_k : float , k : int ) -> float : \"\"\" Calculate the F-beta score at k. The F-beta score is a metric that combines precision and recall, with beta controlling the emphasis on precision. The Beta value is set to the value of 1 to allow for equal weighting for both precision and recall. This method computes the F-beta score at a specific percentage threshold within the top-k predictions. Args: percentage_at_k (float): The percentage of true positive predictions within the top-k. k (int): The number of top predictions to consider. Returns: float: The F-beta score at k, ranging from 0.0 to 1.0. A higher score indicates better trade-off between precision and recall. \"\"\" precision = self . precision_at_k ( k ) recall_at_k = percentage_at_k / 100 return ( ( 2 * precision * recall_at_k ) / ( precision + recall_at_k ) if ( precision + recall_at_k ) > 0 else 0 )","title":"f_beta_score_at_k"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.mean_average_precision_at_k","text":"Calculate the Mean Average Precision at k. Mean Average Precision at k (MAP@k) is a performance metric for ranked data. It calculates the average precision at k for each result rank and then takes the mean across all queries. Parameters: Name Type Description Default k int The number of top predictions to consider for precision calculation. required Returns: Name Type Description float float The Mean Average Precision at k, ranging from 0.0 to 1.0. A higher value indicates better performance in ranking relevant entities higher in the predictions. Source code in src/pheval/analyse/rank_stats.py 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 def mean_average_precision_at_k ( self , k : int ) -> float : \"\"\" Calculate the Mean Average Precision at k. Mean Average Precision at k (MAP@k) is a performance metric for ranked data. It calculates the average precision at k for each result rank and then takes the mean across all queries. Args: k (int): The number of top predictions to consider for precision calculation. Returns: float: The Mean Average Precision at k, ranging from 0.0 to 1.0. A higher value indicates better performance in ranking relevant entities higher in the predictions. \"\"\" cumulative_average_precision_scores = 0 for result_ranks in self . relevant_result_ranks : precision_at_k , number_of_relevant_entities_at_k = 0 , 0 for rank in result_ranks : if 0 < rank <= k : number_of_relevant_entities_at_k += 1 precision_at_k += number_of_relevant_entities_at_k / rank cumulative_average_precision_scores += self . _average_precision_at_k ( number_of_relevant_entities_at_k , precision_at_k ) return ( 1 / self . total ) * cumulative_average_precision_scores","title":"mean_average_precision_at_k"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.mean_normalised_discounted_cumulative_gain","text":"Calculate the mean Normalised Discounted Cumulative Gain (NDCG) for a given rank cutoff. NDCG measures the effectiveness of a ranking by considering both the relevance and the order of items. Parameters: Name Type Description Default k int The rank cutoff for calculating NDCG. required Returns: Name Type Description float float The mean NDCG score across all query results. Source code in src/pheval/analyse/rank_stats.py 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 def mean_normalised_discounted_cumulative_gain ( self , k : int ) -> float : \"\"\" Calculate the mean Normalised Discounted Cumulative Gain (NDCG) for a given rank cutoff. NDCG measures the effectiveness of a ranking by considering both the relevance and the order of items. Args: k (int): The rank cutoff for calculating NDCG. Returns: float: The mean NDCG score across all query results. \"\"\" ndcg_scores = [] for result_ranks in self . relevant_result_ranks : result_ranks = [ rank for rank in result_ranks if rank <= k ] result_ranks = [ 3 if i in result_ranks else 0 for i in range ( k )] ideal_ranking = sorted ( result_ranks , reverse = True ) ndcg_scores . append ( ndcg_score ( np . asarray ([ ideal_ranking ]), np . asarray ([ result_ranks ]))) return np . mean ( ndcg_scores )","title":"mean_normalised_discounted_cumulative_gain"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.mean_reciprocal_rank","text":"Calculate the Mean Reciprocal Rank (MRR) for the stored ranks. The Mean Reciprocal Rank is computed as the mean of the reciprocal ranks for the found cases. If the total number of cases differs from the number of found cases, this method extends the reciprocal ranks list with zeroes for missing cases. Returns: Name Type Description float float The calculated Mean Reciprocal Rank. Source code in src/pheval/analyse/rank_stats.py 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 def mean_reciprocal_rank ( self ) -> float : \"\"\" Calculate the Mean Reciprocal Rank (MRR) for the stored ranks. The Mean Reciprocal Rank is computed as the mean of the reciprocal ranks for the found cases. If the total number of cases differs from the number of found cases, this method extends the reciprocal ranks list with zeroes for missing cases. Returns: float: The calculated Mean Reciprocal Rank. \"\"\" if len ( self . reciprocal_ranks ) != self . total : missing_cases = self . total - self . found self . reciprocal_ranks . extend ([ 0 ] * missing_cases ) return mean ( self . reciprocal_ranks ) return mean ( self . reciprocal_ranks )","title":"mean_reciprocal_rank"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.percentage_difference","text":"Calculate the percentage difference between two percentage values. Parameters: Name Type Description Default percentage_value_1 float The first percentage value. required percentage_value_2 float The second percentage value. required Returns: Name Type Description float float The difference between the two percentage values. Source code in src/pheval/analyse/rank_stats.py 120 121 122 123 124 125 126 127 128 129 130 131 132 @staticmethod def percentage_difference ( percentage_value_1 : float , percentage_value_2 : float ) -> float : \"\"\" Calculate the percentage difference between two percentage values. Args: percentage_value_1 (float): The first percentage value. percentage_value_2 (float): The second percentage value. Returns: float: The difference between the two percentage values. \"\"\" return percentage_value_1 - percentage_value_2","title":"percentage_difference"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.percentage_found","text":"Calculate the percentage of matches found. Returns: Name Type Description float float The percentage of matches found compared to the total count. Source code in src/pheval/analyse/rank_stats.py 111 112 113 114 115 116 117 118 def percentage_found ( self ) -> float : \"\"\" Calculate the percentage of matches found. Returns: float: The percentage of matches found compared to the total count. \"\"\" return self . percentage_rank ( self . found )","title":"percentage_found"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.percentage_rank","text":"Calculate the percentage rank. Parameters: Name Type Description Default value int The value for which the percentage rank needs to be calculated. required Returns: Name Type Description float float The calculated percentage rank based on the provided value and the total count. Source code in src/pheval/analyse/rank_stats.py 63 64 65 66 67 68 69 70 71 72 73 def percentage_rank ( self , value : int ) -> float : \"\"\" Calculate the percentage rank. Args: value (int): The value for which the percentage rank needs to be calculated. Returns: float: The calculated percentage rank based on the provided value and the total count. \"\"\" return 100 * value / self . total","title":"percentage_rank"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.percentage_top","text":"Calculate the percentage of top matches. Returns: Name Type Description float float The percentage of top matches compared to the total count. Source code in src/pheval/analyse/rank_stats.py 75 76 77 78 79 80 81 82 def percentage_top ( self ) -> float : \"\"\" Calculate the percentage of top matches. Returns: float: The percentage of top matches compared to the total count. \"\"\" return self . percentage_rank ( self . top )","title":"percentage_top"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.percentage_top10","text":"Calculate the percentage of matches within the top 10. Returns: Name Type Description float float The percentage of matches within the top 10 compared to the total count. Source code in src/pheval/analyse/rank_stats.py 102 103 104 105 106 107 108 109 def percentage_top10 ( self ) -> float : \"\"\" Calculate the percentage of matches within the top 10. Returns: float: The percentage of matches within the top 10 compared to the total count. \"\"\" return self . percentage_rank ( self . top10 )","title":"percentage_top10"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.percentage_top3","text":"Calculate the percentage of matches within the top 3. Returns: Name Type Description float float The percentage of matches within the top 3 compared to the total count. Source code in src/pheval/analyse/rank_stats.py 84 85 86 87 88 89 90 91 def percentage_top3 ( self ) -> float : \"\"\" Calculate the percentage of matches within the top 3. Returns: float: The percentage of matches within the top 3 compared to the total count. \"\"\" return self . percentage_rank ( self . top3 )","title":"percentage_top3"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.percentage_top5","text":"Calculate the percentage of matches within the top 5. Returns: Name Type Description float float The percentage of matches within the top 5 compared to the total count. Source code in src/pheval/analyse/rank_stats.py 93 94 95 96 97 98 99 100 def percentage_top5 ( self ) -> float : \"\"\" Calculate the percentage of matches within the top 5. Returns: float: The percentage of matches within the top 5 compared to the total count. \"\"\" return self . percentage_rank ( self . top5 )","title":"percentage_top5"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.precision_at_k","text":"Calculate the precision at k. Precision at k is the ratio of relevant items in the top-k predictions to the total number of predictions. It measures the accuracy of the top-k predictions made by a model. Parameters: Name Type Description Default k int The number of top predictions to consider. required Returns: Name Type Description float float The precision at k, ranging from 0.0 to 1.0. float A higher precision indicates a better performance in identifying relevant items in the top-k predictions. Source code in src/pheval/analyse/rank_stats.py 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 def precision_at_k ( self , k : int ) -> float : \"\"\" Calculate the precision at k. Precision at k is the ratio of relevant items in the top-k predictions to the total number of predictions. It measures the accuracy of the top-k predictions made by a model. Args: k (int): The number of top predictions to consider. Returns: float: The precision at k, ranging from 0.0 to 1.0. A higher precision indicates a better performance in identifying relevant items in the top-k predictions. \"\"\" k_attr = getattr ( self , f \"top { k } \" ) if k > 1 else self . top return k_attr / ( self . total * k )","title":"precision_at_k"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStats.return_mean_reciprocal_rank","text":"Retrieve or calculate the Mean Reciprocal Rank (MRR). If a pre-calculated MRR value exists (stored in the 'mrr' attribute), this method returns that value. Otherwise, it computes the Mean Reciprocal Rank using the 'mean_reciprocal_rank' method. Returns: Name Type Description float float The Mean Reciprocal Rank value. Source code in src/pheval/analyse/rank_stats.py 153 154 155 156 157 158 159 160 161 162 163 164 165 166 def return_mean_reciprocal_rank ( self ) -> float : \"\"\" Retrieve or calculate the Mean Reciprocal Rank (MRR). If a pre-calculated MRR value exists (stored in the 'mrr' attribute), this method returns that value. Otherwise, it computes the Mean Reciprocal Rank using the 'mean_reciprocal_rank' method. Returns: float: The Mean Reciprocal Rank value. \"\"\" if self . mrr is not None : return self . mrr else : return self . mean_reciprocal_rank ()","title":"return_mean_reciprocal_rank"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStatsWriter","text":"Class for writing the rank stats to a file. Source code in src/pheval/analyse/rank_stats.py 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 class RankStatsWriter : \"\"\"Class for writing the rank stats to a file.\"\"\" def __init__ ( self , file : Path ): \"\"\" Initialise the RankStatsWriter class Args: file (Path): Path to the file where rank stats will be written \"\"\" self . file = open ( file , \"w\" ) self . writer = csv . writer ( self . file , delimiter = \" \\t \" ) self . writer . writerow ( [ \"results_directory_path\" , \"top\" , \"top3\" , \"top5\" , \"top10\" , \"found\" , \"total\" , \"mean_reciprocal_rank\" , \"percentage_top\" , \"percentage_top3\" , \"percentage_top5\" , \"percentage_top10\" , \"percentage_found\" , \"precision@1\" , \"precision@3\" , \"precision@5\" , \"precision@10\" , \"MAP@1\" , \"MAP@3\" , \"MAP@5\" , \"MAP@10\" , \"f_beta_score@1\" , \"f_beta_score@3\" , \"f_beta_score@5\" , \"f_beta_score@10\" , \"NDCG@3\" , \"NDCG@5\" , \"NDCG@10\" , \"true_positives\" , \"false_positives\" , \"true_negatives\" , \"false_negatives\" , \"sensitivity\" , \"specificity\" , \"precision\" , \"negative_predictive_value\" , \"false_positive_rate\" , \"false_discovery_rate\" , \"false_negative_rate\" , \"accuracy\" , \"f1_score\" , \"matthews_correlation_coefficient\" , ] ) def write_row ( self , directory : Path , rank_stats : RankStats , binary_classification : BinaryClassificationStats , ) -> None : \"\"\" Write summary rank statistics row for a run to the file. Args: directory (Path): Path to the results directory corresponding to the run rank_stats (RankStats): RankStats instance containing rank statistics corresponding to the run Raises: IOError: If there is an error writing to the file. \"\"\" try : self . writer . writerow ( [ directory , rank_stats . top , rank_stats . top3 , rank_stats . top5 , rank_stats . top10 , rank_stats . found , rank_stats . total , rank_stats . mean_reciprocal_rank (), rank_stats . percentage_top (), rank_stats . percentage_top3 (), rank_stats . percentage_top5 (), rank_stats . percentage_top10 (), rank_stats . percentage_found (), rank_stats . precision_at_k ( 1 ), rank_stats . precision_at_k ( 3 ), rank_stats . precision_at_k ( 5 ), rank_stats . precision_at_k ( 10 ), rank_stats . mean_average_precision_at_k ( 1 ), rank_stats . mean_average_precision_at_k ( 3 ), rank_stats . mean_average_precision_at_k ( 5 ), rank_stats . mean_average_precision_at_k ( 10 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top (), 1 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top3 (), 3 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top5 (), 5 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top10 (), 10 ), rank_stats . mean_normalised_discounted_cumulative_gain ( 3 ), rank_stats . mean_normalised_discounted_cumulative_gain ( 5 ), rank_stats . mean_normalised_discounted_cumulative_gain ( 10 ), binary_classification . true_positives , binary_classification . false_positives , binary_classification . true_negatives , binary_classification . false_negatives , binary_classification . sensitivity (), binary_classification . specificity (), binary_classification . precision (), binary_classification . negative_predictive_value (), binary_classification . false_positive_rate (), binary_classification . false_discovery_rate (), binary_classification . false_negative_rate (), binary_classification . accuracy (), binary_classification . f1_score (), binary_classification . matthews_correlation_coefficient (), ] ) except IOError : print ( \"Error writing \" , self . file ) def close ( self ) -> None : \"\"\" Close the file used for writing rank statistics. Raises: IOError: If there's an error while closing the file. \"\"\" try : self . file . close () except IOError : print ( \"Error closing \" , self . file )","title":"RankStatsWriter"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStatsWriter.__init__","text":"Initialise the RankStatsWriter class Args: file (Path): Path to the file where rank stats will be written Source code in src/pheval/analyse/rank_stats.py 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 def __init__ ( self , file : Path ): \"\"\" Initialise the RankStatsWriter class Args: file (Path): Path to the file where rank stats will be written \"\"\" self . file = open ( file , \"w\" ) self . writer = csv . writer ( self . file , delimiter = \" \\t \" ) self . writer . writerow ( [ \"results_directory_path\" , \"top\" , \"top3\" , \"top5\" , \"top10\" , \"found\" , \"total\" , \"mean_reciprocal_rank\" , \"percentage_top\" , \"percentage_top3\" , \"percentage_top5\" , \"percentage_top10\" , \"percentage_found\" , \"precision@1\" , \"precision@3\" , \"precision@5\" , \"precision@10\" , \"MAP@1\" , \"MAP@3\" , \"MAP@5\" , \"MAP@10\" , \"f_beta_score@1\" , \"f_beta_score@3\" , \"f_beta_score@5\" , \"f_beta_score@10\" , \"NDCG@3\" , \"NDCG@5\" , \"NDCG@10\" , \"true_positives\" , \"false_positives\" , \"true_negatives\" , \"false_negatives\" , \"sensitivity\" , \"specificity\" , \"precision\" , \"negative_predictive_value\" , \"false_positive_rate\" , \"false_discovery_rate\" , \"false_negative_rate\" , \"accuracy\" , \"f1_score\" , \"matthews_correlation_coefficient\" , ] )","title":"__init__"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStatsWriter.close","text":"Close the file used for writing rank statistics. Raises: Type Description IOError If there's an error while closing the file. Source code in src/pheval/analyse/rank_stats.py 404 405 406 407 408 409 410 411 412 413 414 def close ( self ) -> None : \"\"\" Close the file used for writing rank statistics. Raises: IOError: If there's an error while closing the file. \"\"\" try : self . file . close () except IOError : print ( \"Error closing \" , self . file )","title":"close"},{"location":"api/pheval/analyse/rank_stats/#src.pheval.analyse.rank_stats.RankStatsWriter.write_row","text":"Write summary rank statistics row for a run to the file. Parameters: Name Type Description Default directory Path Path to the results directory corresponding to the run required rank_stats RankStats RankStats instance containing rank statistics corresponding to the run required Raises: Type Description IOError If there is an error writing to the file. Source code in src/pheval/analyse/rank_stats.py 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 def write_row ( self , directory : Path , rank_stats : RankStats , binary_classification : BinaryClassificationStats , ) -> None : \"\"\" Write summary rank statistics row for a run to the file. Args: directory (Path): Path to the results directory corresponding to the run rank_stats (RankStats): RankStats instance containing rank statistics corresponding to the run Raises: IOError: If there is an error writing to the file. \"\"\" try : self . writer . writerow ( [ directory , rank_stats . top , rank_stats . top3 , rank_stats . top5 , rank_stats . top10 , rank_stats . found , rank_stats . total , rank_stats . mean_reciprocal_rank (), rank_stats . percentage_top (), rank_stats . percentage_top3 (), rank_stats . percentage_top5 (), rank_stats . percentage_top10 (), rank_stats . percentage_found (), rank_stats . precision_at_k ( 1 ), rank_stats . precision_at_k ( 3 ), rank_stats . precision_at_k ( 5 ), rank_stats . precision_at_k ( 10 ), rank_stats . mean_average_precision_at_k ( 1 ), rank_stats . mean_average_precision_at_k ( 3 ), rank_stats . mean_average_precision_at_k ( 5 ), rank_stats . mean_average_precision_at_k ( 10 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top (), 1 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top3 (), 3 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top5 (), 5 ), rank_stats . f_beta_score_at_k ( rank_stats . percentage_top10 (), 10 ), rank_stats . mean_normalised_discounted_cumulative_gain ( 3 ), rank_stats . mean_normalised_discounted_cumulative_gain ( 5 ), rank_stats . mean_normalised_discounted_cumulative_gain ( 10 ), binary_classification . true_positives , binary_classification . false_positives , binary_classification . true_negatives , binary_classification . false_negatives , binary_classification . sensitivity (), binary_classification . specificity (), binary_classification . precision (), binary_classification . negative_predictive_value (), binary_classification . false_positive_rate (), binary_classification . false_discovery_rate (), binary_classification . false_negative_rate (), binary_classification . accuracy (), binary_classification . f1_score (), binary_classification . matthews_correlation_coefficient (), ] ) except IOError : print ( \"Error writing \" , self . file )","title":"write_row"},{"location":"api/pheval/analyse/run_data_parser/","text":"TrackInputOutputDirectories dataclass Track the input phenopacket test data for a corresponding pheval output directory. Attributes: Name Type Description phenopacket_dir Path The directory containing input phenopackets. results_dir Path The directory containing output results from pheval. Source code in src/pheval/analyse/run_data_parser.py 8 9 10 11 12 13 14 15 16 17 18 19 @dataclass class TrackInputOutputDirectories : \"\"\" Track the input phenopacket test data for a corresponding pheval output directory. Attributes: phenopacket_dir (Path): The directory containing input phenopackets. results_dir (Path): The directory containing output results from pheval. \"\"\" phenopacket_dir : Path results_dir : Path parse_run_data_text_file ( run_data_path ) Parse run data .txt file returning a list of input phenopacket and corresponding output directories. Parameters: Name Type Description Default run_data_path Path The path to the run data .txt file. required Returns: Type Description List [ TrackInputOutputDirectories ] List[TrackInputOutputDirectories]: A list of TrackInputOutputDirectories objects, containing List [ TrackInputOutputDirectories ] input test data directories and their corresponding output directories. Notes The run data .txt file should be formatted with tab-separated values. Each row should contain two columns: the first column representing the input test data phenopacket directory, and the second column representing the corresponding run output directory. Source code in src/pheval/analyse/run_data_parser.py 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 def parse_run_data_text_file ( run_data_path : Path ) -> List [ TrackInputOutputDirectories ]: \"\"\" Parse run data .txt file returning a list of input phenopacket and corresponding output directories. Args: run_data_path (Path): The path to the run data .txt file. Returns: List[TrackInputOutputDirectories]: A list of TrackInputOutputDirectories objects, containing input test data directories and their corresponding output directories. Notes: The run data .txt file should be formatted with tab-separated values. Each row should contain two columns: the first column representing the input test data phenopacket directory, and the second column representing the corresponding run output directory. \"\"\" run_data = pd . read_csv ( run_data_path , delimiter = \" \\t \" , header = None ) run_data_list = [] for _index , row in run_data . iterrows (): run_data_list . append ( TrackInputOutputDirectories ( phenopacket_dir = Path ( row [ 0 ]), results_dir = Path ( row [ 1 ])) ) return run_data_list","title":"Run data parser"},{"location":"api/pheval/analyse/run_data_parser/#src.pheval.analyse.run_data_parser.TrackInputOutputDirectories","text":"Track the input phenopacket test data for a corresponding pheval output directory. Attributes: Name Type Description phenopacket_dir Path The directory containing input phenopackets. results_dir Path The directory containing output results from pheval. Source code in src/pheval/analyse/run_data_parser.py 8 9 10 11 12 13 14 15 16 17 18 19 @dataclass class TrackInputOutputDirectories : \"\"\" Track the input phenopacket test data for a corresponding pheval output directory. Attributes: phenopacket_dir (Path): The directory containing input phenopackets. results_dir (Path): The directory containing output results from pheval. \"\"\" phenopacket_dir : Path results_dir : Path","title":"TrackInputOutputDirectories"},{"location":"api/pheval/analyse/run_data_parser/#src.pheval.analyse.run_data_parser.parse_run_data_text_file","text":"Parse run data .txt file returning a list of input phenopacket and corresponding output directories. Parameters: Name Type Description Default run_data_path Path The path to the run data .txt file. required Returns: Type Description List [ TrackInputOutputDirectories ] List[TrackInputOutputDirectories]: A list of TrackInputOutputDirectories objects, containing List [ TrackInputOutputDirectories ] input test data directories and their corresponding output directories. Notes The run data .txt file should be formatted with tab-separated values. Each row should contain two columns: the first column representing the input test data phenopacket directory, and the second column representing the corresponding run output directory. Source code in src/pheval/analyse/run_data_parser.py 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 def parse_run_data_text_file ( run_data_path : Path ) -> List [ TrackInputOutputDirectories ]: \"\"\" Parse run data .txt file returning a list of input phenopacket and corresponding output directories. Args: run_data_path (Path): The path to the run data .txt file. Returns: List[TrackInputOutputDirectories]: A list of TrackInputOutputDirectories objects, containing input test data directories and their corresponding output directories. Notes: The run data .txt file should be formatted with tab-separated values. Each row should contain two columns: the first column representing the input test data phenopacket directory, and the second column representing the corresponding run output directory. \"\"\" run_data = pd . read_csv ( run_data_path , delimiter = \" \\t \" , header = None ) run_data_list = [] for _index , row in run_data . iterrows (): run_data_list . append ( TrackInputOutputDirectories ( phenopacket_dir = Path ( row [ 0 ]), results_dir = Path ( row [ 1 ])) ) return run_data_list","title":"parse_run_data_text_file"},{"location":"api/pheval/analyse/variant_prioritisation_analysis/","text":"AssessVariantPrioritisation Class for assessing variant prioritisation based on thresholds and scoring orders. Source code in src/pheval/analyse/variant_prioritisation_analysis.py 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 class AssessVariantPrioritisation : \"\"\"Class for assessing variant prioritisation based on thresholds and scoring orders.\"\"\" def __init__ ( self , phenopacket_path : Path , results_dir : Path , standardised_variant_results : List [ RankedPhEvalVariantResult ], threshold : float , score_order : str , proband_causative_variants : List [ GenomicVariant ], ): \"\"\" Initialise AssessVariantPrioritisation class Args: phenopacket_path (Path): Path to the phenopacket file results_dir (Path): Path to the results directory standardised_variant_results (List[RankedPhEvalVariantResult]): List of ranked PhEval variant results threshold (float): Threshold for scores score_order (str): Score order for results, either ascending or descending proband_causative_variants (List[GenomicVariant]): List of proband variants \"\"\" self . phenopacket_path = phenopacket_path self . results_dir = results_dir self . standardised_variant_results = standardised_variant_results self . threshold = threshold self . score_order = score_order self . proband_causative_variants = proband_causative_variants def _record_variant_prioritisation_match ( self , result_entry : RankedPhEvalVariantResult , rank_stats : RankStats , ) -> VariantPrioritisationResult : \"\"\" Record the variant prioritisation rank if found within the results Args: result_entry (RankedPhEvalVariantResult): Ranked PhEval variant result entry rank_stats (RankStats): RankStats class instance Returns: VariantPrioritisationResult: Recorded correct variant prioritisation rank result \"\"\" rank = result_entry . rank rank_stats . add_rank ( rank ) return VariantPrioritisationResult ( self . phenopacket_path , GenomicVariant ( chrom = result_entry . chromosome , pos = result_entry . start , ref = result_entry . ref , alt = result_entry . alt , ), rank , ) def _assess_variant_with_threshold_ascending_order ( self , result_entry : RankedPhEvalVariantResult , rank_stats : RankStats ) -> VariantPrioritisationResult : \"\"\" Record the variant prioritisation rank if it meets the ascending order threshold. This method checks if the variant prioritisation rank meets the ascending order threshold. If the score of the result entry is less than the threshold, it records the variant rank. Args: result_entry (RankedPhEvalVariantResult): Ranked PhEval variant result entry rank_stats (RankStats): RankStats class instance Returns: VariantPrioritisationResult: Recorded correct variant prioritisation rank result \"\"\" if float ( self . threshold ) > float ( result_entry . score ): return self . _record_variant_prioritisation_match ( result_entry , rank_stats ) def _assess_variant_with_threshold ( self , result_entry : RankedPhEvalVariantResult , rank_stats : RankStats ) -> VariantPrioritisationResult : \"\"\" Record the variant prioritisation rank if it meets the score threshold. This method checks if the variant prioritisation rank meets the score threshold. If the score of the result entry is greater than the threshold, it records the variant rank. Args: result_entry (RankedPhEvalVariantResult): Ranked PhEval variant result entry rank_stats (RankStats): RankStats class instance Returns: VariantPrioritisationResult: Recorded correct variant prioritisation rank result \"\"\" if float ( self . threshold ) < float ( result_entry . score ): return self . _record_variant_prioritisation_match ( result_entry , rank_stats ) def _record_matched_variant ( self , rank_stats : RankStats , standardised_variant_result : RankedPhEvalVariantResult ) -> VariantPrioritisationResult : \"\"\" Return the variant rank result - handling the specification of a threshold. This method determines and returns the variant rank result based on the specified threshold and score order. If the threshold is 0.0, it records the variant rank directly. Otherwise, it assesses the variant with the threshold based on the score order. Args: rank_stats (RankStats): RankStats class instance standardised_variant_result (RankedPhEvalVariantResult): Ranked PhEval variant result entry Returns: VariantPrioritisationResult: Recorded correct variant prioritisation rank result \"\"\" if float ( self . threshold ) == 0.0 : return self . _record_variant_prioritisation_match ( standardised_variant_result , rank_stats ) else : return ( self . _assess_variant_with_threshold ( standardised_variant_result , rank_stats ) if self . score_order != \"ascending\" else self . _assess_variant_with_threshold_ascending_order ( standardised_variant_result , rank_stats ) ) def assess_variant_prioritisation ( self , rank_stats : RankStats , rank_records : defaultdict , binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess variant prioritisation. This method assesses the prioritisation of variants based on the provided criteria and records ranks using a PrioritisationRankRecorder. Args: rank_stats (RankStats): RankStats class instance rank_records (defaultdict): A defaultdict to store the correct ranked results. binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" relevant_ranks = [] for variant in self . proband_causative_variants : rank_stats . total += 1 variant_match = VariantPrioritisationResult ( self . phenopacket_path , variant ) for result in self . standardised_variant_results : result_variant = GenomicVariant ( chrom = str ( result . chromosome ), pos = int ( result . start ), ref = result . ref , alt = result . alt , ) if variant == result_variant : variant_match = self . _record_matched_variant ( rank_stats , result ) ( relevant_ranks . append ( variant_match . rank ) if variant_match else relevant_ranks . append ( 0 ) ) break PrioritisationRankRecorder ( rank_stats . total , self . results_dir , ( VariantPrioritisationResult ( self . phenopacket_path , variant ) if variant_match is None else variant_match ), rank_records , ) . record_rank () rank_stats . relevant_result_ranks . append ( relevant_ranks ) binary_classification_stats . add_classification ( self . standardised_variant_results , relevant_ranks ) __init__ ( phenopacket_path , results_dir , standardised_variant_results , threshold , score_order , proband_causative_variants ) Initialise AssessVariantPrioritisation class Parameters: Name Type Description Default phenopacket_path Path Path to the phenopacket file required results_dir Path Path to the results directory required standardised_variant_results List [ RankedPhEvalVariantResult ] List of ranked PhEval variant results required threshold float Threshold for scores required score_order str Score order for results, either ascending or descending required proband_causative_variants List [ GenomicVariant ] List of proband variants required Source code in src/pheval/analyse/variant_prioritisation_analysis.py 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 def __init__ ( self , phenopacket_path : Path , results_dir : Path , standardised_variant_results : List [ RankedPhEvalVariantResult ], threshold : float , score_order : str , proband_causative_variants : List [ GenomicVariant ], ): \"\"\" Initialise AssessVariantPrioritisation class Args: phenopacket_path (Path): Path to the phenopacket file results_dir (Path): Path to the results directory standardised_variant_results (List[RankedPhEvalVariantResult]): List of ranked PhEval variant results threshold (float): Threshold for scores score_order (str): Score order for results, either ascending or descending proband_causative_variants (List[GenomicVariant]): List of proband variants \"\"\" self . phenopacket_path = phenopacket_path self . results_dir = results_dir self . standardised_variant_results = standardised_variant_results self . threshold = threshold self . score_order = score_order self . proband_causative_variants = proband_causative_variants assess_variant_prioritisation ( rank_stats , rank_records , binary_classification_stats ) Assess variant prioritisation. This method assesses the prioritisation of variants based on the provided criteria and records ranks using a PrioritisationRankRecorder. Parameters: Name Type Description Default rank_stats RankStats RankStats class instance required rank_records defaultdict A defaultdict to store the correct ranked results. required binary_classification_stats BinaryClassificationStats BinaryClassificationStats class instance. required Source code in src/pheval/analyse/variant_prioritisation_analysis.py 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 def assess_variant_prioritisation ( self , rank_stats : RankStats , rank_records : defaultdict , binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess variant prioritisation. This method assesses the prioritisation of variants based on the provided criteria and records ranks using a PrioritisationRankRecorder. Args: rank_stats (RankStats): RankStats class instance rank_records (defaultdict): A defaultdict to store the correct ranked results. binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" relevant_ranks = [] for variant in self . proband_causative_variants : rank_stats . total += 1 variant_match = VariantPrioritisationResult ( self . phenopacket_path , variant ) for result in self . standardised_variant_results : result_variant = GenomicVariant ( chrom = str ( result . chromosome ), pos = int ( result . start ), ref = result . ref , alt = result . alt , ) if variant == result_variant : variant_match = self . _record_matched_variant ( rank_stats , result ) ( relevant_ranks . append ( variant_match . rank ) if variant_match else relevant_ranks . append ( 0 ) ) break PrioritisationRankRecorder ( rank_stats . total , self . results_dir , ( VariantPrioritisationResult ( self . phenopacket_path , variant ) if variant_match is None else variant_match ), rank_records , ) . record_rank () rank_stats . relevant_result_ranks . append ( relevant_ranks ) binary_classification_stats . add_classification ( self . standardised_variant_results , relevant_ranks ) assess_phenopacket_variant_prioritisation ( phenopacket_path , score_order , results_dir_and_input , threshold , variant_rank_stats , variant_rank_comparison , variant_binary_classification_stats ) Assess variant prioritisation for a Phenopacket by comparing PhEval standardised variant results against the recorded causative variants for a proband in the Phenopacket. Parameters: Name Type Description Default phenopacket_path Path Path to the Phenopacket. required score_order str The order in which scores are arranged, either ascending or descending. required results_dir_and_input TrackInputOutputDirectories Input and output directories. required threshold float Threshold for assessment. required variant_rank_stats RankStats RankStats class instance. required variant_rank_comparison defaultdict Default dictionary for variant rank comparisons. required variant_binary_classification_stats BinaryClassificationStats BinaryClassificationStats class instance. required Source code in src/pheval/analyse/variant_prioritisation_analysis.py 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 def assess_phenopacket_variant_prioritisation ( phenopacket_path : Path , score_order : str , results_dir_and_input : TrackInputOutputDirectories , threshold : float , variant_rank_stats : RankStats , variant_rank_comparison : defaultdict , variant_binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess variant prioritisation for a Phenopacket by comparing PhEval standardised variant results against the recorded causative variants for a proband in the Phenopacket. Args: phenopacket_path (Path): Path to the Phenopacket. score_order (str): The order in which scores are arranged, either ascending or descending. results_dir_and_input (TrackInputOutputDirectories): Input and output directories. threshold (float): Threshold for assessment. variant_rank_stats (RankStats): RankStats class instance. variant_rank_comparison (defaultdict): Default dictionary for variant rank comparisons. variant_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" proband_causative_variants = _obtain_causative_variants ( phenopacket_path ) standardised_variant_result = results_dir_and_input . results_dir . joinpath ( f \"pheval_variant_results/ { phenopacket_path . stem } -pheval_variant_result.tsv\" ) pheval_variant_result = read_standardised_result ( standardised_variant_result ) AssessVariantPrioritisation ( phenopacket_path , results_dir_and_input . results_dir . joinpath ( \"pheval_variant_results/\" ), parse_pheval_result ( RankedPhEvalVariantResult , pheval_variant_result ), threshold , score_order , proband_causative_variants , ) . assess_variant_prioritisation ( variant_rank_stats , variant_rank_comparison , variant_binary_classification_stats ) benchmark_variant_prioritisation ( results_directory_and_input , score_order , threshold , variant_rank_comparison ) Benchmark a directory based on variant prioritisation results. Parameters: Name Type Description Default results_directory_and_input TrackInputOutputDirectories Input and output directories. required score_order str The order in which scores are arranged. required threshold float Threshold for assessment. required variant_rank_comparison defaultdict Default dictionary for variant rank comparisons. required Returns: Name Type Description BenchmarkRunResults An object containing benchmarking results for variant prioritisation, including ranks and rank statistics for the benchmarked directory. Source code in src/pheval/analyse/variant_prioritisation_analysis.py 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 def benchmark_variant_prioritisation ( results_directory_and_input : TrackInputOutputDirectories , score_order : str , threshold : float , variant_rank_comparison : defaultdict , ): \"\"\" Benchmark a directory based on variant prioritisation results. Args: results_directory_and_input (TrackInputOutputDirectories): Input and output directories. score_order (str): The order in which scores are arranged. threshold (float): Threshold for assessment. variant_rank_comparison (defaultdict): Default dictionary for variant rank comparisons. Returns: BenchmarkRunResults: An object containing benchmarking results for variant prioritisation, including ranks and rank statistics for the benchmarked directory. \"\"\" variant_rank_stats = RankStats () variant_binary_classification_stats = BinaryClassificationStats () for phenopacket_path in all_files ( results_directory_and_input . phenopacket_dir ): assess_phenopacket_variant_prioritisation ( phenopacket_path , score_order , results_directory_and_input , threshold , variant_rank_stats , variant_rank_comparison , variant_binary_classification_stats , ) return BenchmarkRunResults ( results_dir = results_directory_and_input . results_dir , ranks = variant_rank_comparison , rank_stats = variant_rank_stats , binary_classification_stats = variant_binary_classification_stats , )","title":"Variant prioritisation analysis"},{"location":"api/pheval/analyse/variant_prioritisation_analysis/#src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioritisation","text":"Class for assessing variant prioritisation based on thresholds and scoring orders. Source code in src/pheval/analyse/variant_prioritisation_analysis.py 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 class AssessVariantPrioritisation : \"\"\"Class for assessing variant prioritisation based on thresholds and scoring orders.\"\"\" def __init__ ( self , phenopacket_path : Path , results_dir : Path , standardised_variant_results : List [ RankedPhEvalVariantResult ], threshold : float , score_order : str , proband_causative_variants : List [ GenomicVariant ], ): \"\"\" Initialise AssessVariantPrioritisation class Args: phenopacket_path (Path): Path to the phenopacket file results_dir (Path): Path to the results directory standardised_variant_results (List[RankedPhEvalVariantResult]): List of ranked PhEval variant results threshold (float): Threshold for scores score_order (str): Score order for results, either ascending or descending proband_causative_variants (List[GenomicVariant]): List of proband variants \"\"\" self . phenopacket_path = phenopacket_path self . results_dir = results_dir self . standardised_variant_results = standardised_variant_results self . threshold = threshold self . score_order = score_order self . proband_causative_variants = proband_causative_variants def _record_variant_prioritisation_match ( self , result_entry : RankedPhEvalVariantResult , rank_stats : RankStats , ) -> VariantPrioritisationResult : \"\"\" Record the variant prioritisation rank if found within the results Args: result_entry (RankedPhEvalVariantResult): Ranked PhEval variant result entry rank_stats (RankStats): RankStats class instance Returns: VariantPrioritisationResult: Recorded correct variant prioritisation rank result \"\"\" rank = result_entry . rank rank_stats . add_rank ( rank ) return VariantPrioritisationResult ( self . phenopacket_path , GenomicVariant ( chrom = result_entry . chromosome , pos = result_entry . start , ref = result_entry . ref , alt = result_entry . alt , ), rank , ) def _assess_variant_with_threshold_ascending_order ( self , result_entry : RankedPhEvalVariantResult , rank_stats : RankStats ) -> VariantPrioritisationResult : \"\"\" Record the variant prioritisation rank if it meets the ascending order threshold. This method checks if the variant prioritisation rank meets the ascending order threshold. If the score of the result entry is less than the threshold, it records the variant rank. Args: result_entry (RankedPhEvalVariantResult): Ranked PhEval variant result entry rank_stats (RankStats): RankStats class instance Returns: VariantPrioritisationResult: Recorded correct variant prioritisation rank result \"\"\" if float ( self . threshold ) > float ( result_entry . score ): return self . _record_variant_prioritisation_match ( result_entry , rank_stats ) def _assess_variant_with_threshold ( self , result_entry : RankedPhEvalVariantResult , rank_stats : RankStats ) -> VariantPrioritisationResult : \"\"\" Record the variant prioritisation rank if it meets the score threshold. This method checks if the variant prioritisation rank meets the score threshold. If the score of the result entry is greater than the threshold, it records the variant rank. Args: result_entry (RankedPhEvalVariantResult): Ranked PhEval variant result entry rank_stats (RankStats): RankStats class instance Returns: VariantPrioritisationResult: Recorded correct variant prioritisation rank result \"\"\" if float ( self . threshold ) < float ( result_entry . score ): return self . _record_variant_prioritisation_match ( result_entry , rank_stats ) def _record_matched_variant ( self , rank_stats : RankStats , standardised_variant_result : RankedPhEvalVariantResult ) -> VariantPrioritisationResult : \"\"\" Return the variant rank result - handling the specification of a threshold. This method determines and returns the variant rank result based on the specified threshold and score order. If the threshold is 0.0, it records the variant rank directly. Otherwise, it assesses the variant with the threshold based on the score order. Args: rank_stats (RankStats): RankStats class instance standardised_variant_result (RankedPhEvalVariantResult): Ranked PhEval variant result entry Returns: VariantPrioritisationResult: Recorded correct variant prioritisation rank result \"\"\" if float ( self . threshold ) == 0.0 : return self . _record_variant_prioritisation_match ( standardised_variant_result , rank_stats ) else : return ( self . _assess_variant_with_threshold ( standardised_variant_result , rank_stats ) if self . score_order != \"ascending\" else self . _assess_variant_with_threshold_ascending_order ( standardised_variant_result , rank_stats ) ) def assess_variant_prioritisation ( self , rank_stats : RankStats , rank_records : defaultdict , binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess variant prioritisation. This method assesses the prioritisation of variants based on the provided criteria and records ranks using a PrioritisationRankRecorder. Args: rank_stats (RankStats): RankStats class instance rank_records (defaultdict): A defaultdict to store the correct ranked results. binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" relevant_ranks = [] for variant in self . proband_causative_variants : rank_stats . total += 1 variant_match = VariantPrioritisationResult ( self . phenopacket_path , variant ) for result in self . standardised_variant_results : result_variant = GenomicVariant ( chrom = str ( result . chromosome ), pos = int ( result . start ), ref = result . ref , alt = result . alt , ) if variant == result_variant : variant_match = self . _record_matched_variant ( rank_stats , result ) ( relevant_ranks . append ( variant_match . rank ) if variant_match else relevant_ranks . append ( 0 ) ) break PrioritisationRankRecorder ( rank_stats . total , self . results_dir , ( VariantPrioritisationResult ( self . phenopacket_path , variant ) if variant_match is None else variant_match ), rank_records , ) . record_rank () rank_stats . relevant_result_ranks . append ( relevant_ranks ) binary_classification_stats . add_classification ( self . standardised_variant_results , relevant_ranks )","title":"AssessVariantPrioritisation"},{"location":"api/pheval/analyse/variant_prioritisation_analysis/#src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioritisation.__init__","text":"Initialise AssessVariantPrioritisation class Parameters: Name Type Description Default phenopacket_path Path Path to the phenopacket file required results_dir Path Path to the results directory required standardised_variant_results List [ RankedPhEvalVariantResult ] List of ranked PhEval variant results required threshold float Threshold for scores required score_order str Score order for results, either ascending or descending required proband_causative_variants List [ GenomicVariant ] List of proband variants required Source code in src/pheval/analyse/variant_prioritisation_analysis.py 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 def __init__ ( self , phenopacket_path : Path , results_dir : Path , standardised_variant_results : List [ RankedPhEvalVariantResult ], threshold : float , score_order : str , proband_causative_variants : List [ GenomicVariant ], ): \"\"\" Initialise AssessVariantPrioritisation class Args: phenopacket_path (Path): Path to the phenopacket file results_dir (Path): Path to the results directory standardised_variant_results (List[RankedPhEvalVariantResult]): List of ranked PhEval variant results threshold (float): Threshold for scores score_order (str): Score order for results, either ascending or descending proband_causative_variants (List[GenomicVariant]): List of proband variants \"\"\" self . phenopacket_path = phenopacket_path self . results_dir = results_dir self . standardised_variant_results = standardised_variant_results self . threshold = threshold self . score_order = score_order self . proband_causative_variants = proband_causative_variants","title":"__init__"},{"location":"api/pheval/analyse/variant_prioritisation_analysis/#src.pheval.analyse.variant_prioritisation_analysis.AssessVariantPrioritisation.assess_variant_prioritisation","text":"Assess variant prioritisation. This method assesses the prioritisation of variants based on the provided criteria and records ranks using a PrioritisationRankRecorder. Parameters: Name Type Description Default rank_stats RankStats RankStats class instance required rank_records defaultdict A defaultdict to store the correct ranked results. required binary_classification_stats BinaryClassificationStats BinaryClassificationStats class instance. required Source code in src/pheval/analyse/variant_prioritisation_analysis.py 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 def assess_variant_prioritisation ( self , rank_stats : RankStats , rank_records : defaultdict , binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess variant prioritisation. This method assesses the prioritisation of variants based on the provided criteria and records ranks using a PrioritisationRankRecorder. Args: rank_stats (RankStats): RankStats class instance rank_records (defaultdict): A defaultdict to store the correct ranked results. binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" relevant_ranks = [] for variant in self . proband_causative_variants : rank_stats . total += 1 variant_match = VariantPrioritisationResult ( self . phenopacket_path , variant ) for result in self . standardised_variant_results : result_variant = GenomicVariant ( chrom = str ( result . chromosome ), pos = int ( result . start ), ref = result . ref , alt = result . alt , ) if variant == result_variant : variant_match = self . _record_matched_variant ( rank_stats , result ) ( relevant_ranks . append ( variant_match . rank ) if variant_match else relevant_ranks . append ( 0 ) ) break PrioritisationRankRecorder ( rank_stats . total , self . results_dir , ( VariantPrioritisationResult ( self . phenopacket_path , variant ) if variant_match is None else variant_match ), rank_records , ) . record_rank () rank_stats . relevant_result_ranks . append ( relevant_ranks ) binary_classification_stats . add_classification ( self . standardised_variant_results , relevant_ranks )","title":"assess_variant_prioritisation"},{"location":"api/pheval/analyse/variant_prioritisation_analysis/#src.pheval.analyse.variant_prioritisation_analysis.assess_phenopacket_variant_prioritisation","text":"Assess variant prioritisation for a Phenopacket by comparing PhEval standardised variant results against the recorded causative variants for a proband in the Phenopacket. Parameters: Name Type Description Default phenopacket_path Path Path to the Phenopacket. required score_order str The order in which scores are arranged, either ascending or descending. required results_dir_and_input TrackInputOutputDirectories Input and output directories. required threshold float Threshold for assessment. required variant_rank_stats RankStats RankStats class instance. required variant_rank_comparison defaultdict Default dictionary for variant rank comparisons. required variant_binary_classification_stats BinaryClassificationStats BinaryClassificationStats class instance. required Source code in src/pheval/analyse/variant_prioritisation_analysis.py 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 def assess_phenopacket_variant_prioritisation ( phenopacket_path : Path , score_order : str , results_dir_and_input : TrackInputOutputDirectories , threshold : float , variant_rank_stats : RankStats , variant_rank_comparison : defaultdict , variant_binary_classification_stats : BinaryClassificationStats , ) -> None : \"\"\" Assess variant prioritisation for a Phenopacket by comparing PhEval standardised variant results against the recorded causative variants for a proband in the Phenopacket. Args: phenopacket_path (Path): Path to the Phenopacket. score_order (str): The order in which scores are arranged, either ascending or descending. results_dir_and_input (TrackInputOutputDirectories): Input and output directories. threshold (float): Threshold for assessment. variant_rank_stats (RankStats): RankStats class instance. variant_rank_comparison (defaultdict): Default dictionary for variant rank comparisons. variant_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance. \"\"\" proband_causative_variants = _obtain_causative_variants ( phenopacket_path ) standardised_variant_result = results_dir_and_input . results_dir . joinpath ( f \"pheval_variant_results/ { phenopacket_path . stem } -pheval_variant_result.tsv\" ) pheval_variant_result = read_standardised_result ( standardised_variant_result ) AssessVariantPrioritisation ( phenopacket_path , results_dir_and_input . results_dir . joinpath ( \"pheval_variant_results/\" ), parse_pheval_result ( RankedPhEvalVariantResult , pheval_variant_result ), threshold , score_order , proband_causative_variants , ) . assess_variant_prioritisation ( variant_rank_stats , variant_rank_comparison , variant_binary_classification_stats )","title":"assess_phenopacket_variant_prioritisation"},{"location":"api/pheval/analyse/variant_prioritisation_analysis/#src.pheval.analyse.variant_prioritisation_analysis.benchmark_variant_prioritisation","text":"Benchmark a directory based on variant prioritisation results. Parameters: Name Type Description Default results_directory_and_input TrackInputOutputDirectories Input and output directories. required score_order str The order in which scores are arranged. required threshold float Threshold for assessment. required variant_rank_comparison defaultdict Default dictionary for variant rank comparisons. required Returns: Name Type Description BenchmarkRunResults An object containing benchmarking results for variant prioritisation, including ranks and rank statistics for the benchmarked directory. Source code in src/pheval/analyse/variant_prioritisation_analysis.py 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 def benchmark_variant_prioritisation ( results_directory_and_input : TrackInputOutputDirectories , score_order : str , threshold : float , variant_rank_comparison : defaultdict , ): \"\"\" Benchmark a directory based on variant prioritisation results. Args: results_directory_and_input (TrackInputOutputDirectories): Input and output directories. score_order (str): The order in which scores are arranged. threshold (float): Threshold for assessment. variant_rank_comparison (defaultdict): Default dictionary for variant rank comparisons. Returns: BenchmarkRunResults: An object containing benchmarking results for variant prioritisation, including ranks and rank statistics for the benchmarked directory. \"\"\" variant_rank_stats = RankStats () variant_binary_classification_stats = BinaryClassificationStats () for phenopacket_path in all_files ( results_directory_and_input . phenopacket_dir ): assess_phenopacket_variant_prioritisation ( phenopacket_path , score_order , results_directory_and_input , threshold , variant_rank_stats , variant_rank_comparison , variant_binary_classification_stats , ) return BenchmarkRunResults ( results_dir = results_directory_and_input . results_dir , ranks = variant_rank_comparison , rank_stats = variant_rank_stats , binary_classification_stats = variant_binary_classification_stats , )","title":"benchmark_variant_prioritisation"},{"location":"api/pheval/infra/exomiserdb/","text":"DBConnection Source code in src/pheval/infra/exomiserdb.py 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 class DBConnection : connection = None def __init__ ( self , connection ): DBConnection . connection = connection @classmethod def get_connection ( cls ) -> jaydebeapi . Connection : \"\"\"Creates return new Singleton database connection\"\"\" return DBConnection . connection def close ( self ): return self . connection . close () @classmethod def get_cursor ( cls ) -> jaydebeapi . Cursor : connection = cls . get_connection () return connection . cursor () get_connection () classmethod Creates return new Singleton database connection Source code in src/pheval/infra/exomiserdb.py 49 50 51 52 @classmethod def get_connection ( cls ) -> jaydebeapi . Connection : \"\"\"Creates return new Singleton database connection\"\"\" return DBConnection . connection DBConnector Source code in src/pheval/infra/exomiserdb.py 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 class DBConnector : def __init__ ( self , jar : Path , driver : str , server : str , database : str , user : str , password : str ): self . jar = jar self . driver = driver self . server = server self . database = database self . user = user self . password = password self . dbconn = None def create_connection ( self ) -> jaydebeapi . Connection : \"\"\"creates h2 database connection\"\"\" return jaydebeapi . connect ( self . driver , f \" { self . server }{ self . database } \" , [ self . user , self . password ], self . jar , ) def __enter__ ( self ) -> jaydebeapi . Connection : self . dbconn = self . create_connection () return self . dbconn def __exit__ ( self , * other ): self . dbconn . close () create_connection () creates h2 database connection Source code in src/pheval/infra/exomiserdb.py 26 27 28 29 30 31 32 33 def create_connection ( self ) -> jaydebeapi . Connection : \"\"\"creates h2 database connection\"\"\" return jaydebeapi . connect ( self . driver , f \" { self . server }{ self . database } \" , [ self . user , self . password ], self . jar , ) ExomiserDB Source code in src/pheval/infra/exomiserdb.py 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 class ExomiserDB : def __init__ ( self , db_path : Path ): try : self . connector = DBConnector ( # noqa jar = os . path . join ( os . path . dirname ( __file__ ), \"../../../lib/h2-1.4.199.jar\" ), driver = \"org.h2.Driver\" , server = f \"jdbc:h2: { db_path } \" , user = \"sa\" , password = \"\" , database = \"\" , ) except Exception as e : print ( \"An exception occurred\" , e ) def import_from_semsim_file ( self , input_file : Path , subject_prefix : str , object_prefix : str ): \"\"\"imports semsim tsv profile into exomiser phenotype database Args: input_file (Path): semsim profile subject_prefix (str): Subject Prefix. e.g HP object_prefix (str): Object Prefix. e.g MP \"\"\" with self . connector as cnn : conn = DBConnection ( cnn ) reader = pl . read_csv_batched ( input_file , separator = \" \\t \" ) batch_length = 5 batches = reader . next_batches ( batch_length ) cursor = conn . get_cursor () # # TODO: Refactor this with open ( input_file , \"r\" ) as f : total = sum ( 1 for line in f ) pbar = tqdm ( total = total - 1 ) mapping_id = 1 while batches : input_data = pl . concat ( batches ) sql = _semsim2h2 ( input_data , object_prefix , subject_prefix , mapping_id = mapping_id ) cursor . execute ( sql ) len_input_data = len ( input_data ) mapping_id += len_input_data pbar . update ( len_input_data ) batches = reader . next_batches ( batch_length ) import_from_semsim_file ( input_file , subject_prefix , object_prefix ) imports semsim tsv profile into exomiser phenotype database Parameters: Name Type Description Default input_file Path semsim profile required subject_prefix str Subject Prefix. e.g HP required object_prefix str Object Prefix. e.g MP required Source code in src/pheval/infra/exomiserdb.py 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 def import_from_semsim_file ( self , input_file : Path , subject_prefix : str , object_prefix : str ): \"\"\"imports semsim tsv profile into exomiser phenotype database Args: input_file (Path): semsim profile subject_prefix (str): Subject Prefix. e.g HP object_prefix (str): Object Prefix. e.g MP \"\"\" with self . connector as cnn : conn = DBConnection ( cnn ) reader = pl . read_csv_batched ( input_file , separator = \" \\t \" ) batch_length = 5 batches = reader . next_batches ( batch_length ) cursor = conn . get_cursor () # # TODO: Refactor this with open ( input_file , \"r\" ) as f : total = sum ( 1 for line in f ) pbar = tqdm ( total = total - 1 ) mapping_id = 1 while batches : input_data = pl . concat ( batches ) sql = _semsim2h2 ( input_data , object_prefix , subject_prefix , mapping_id = mapping_id ) cursor . execute ( sql ) len_input_data = len ( input_data ) mapping_id += len_input_data pbar . update ( len_input_data ) batches = reader . next_batches ( batch_length )","title":"Exomiserdb"},{"location":"api/pheval/infra/exomiserdb/#src.pheval.infra.exomiserdb.DBConnection","text":"Source code in src/pheval/infra/exomiserdb.py 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 class DBConnection : connection = None def __init__ ( self , connection ): DBConnection . connection = connection @classmethod def get_connection ( cls ) -> jaydebeapi . Connection : \"\"\"Creates return new Singleton database connection\"\"\" return DBConnection . connection def close ( self ): return self . connection . close () @classmethod def get_cursor ( cls ) -> jaydebeapi . Cursor : connection = cls . get_connection () return connection . cursor ()","title":"DBConnection"},{"location":"api/pheval/infra/exomiserdb/#src.pheval.infra.exomiserdb.DBConnection.get_connection","text":"Creates return new Singleton database connection Source code in src/pheval/infra/exomiserdb.py 49 50 51 52 @classmethod def get_connection ( cls ) -> jaydebeapi . Connection : \"\"\"Creates return new Singleton database connection\"\"\" return DBConnection . connection","title":"get_connection"},{"location":"api/pheval/infra/exomiserdb/#src.pheval.infra.exomiserdb.DBConnector","text":"Source code in src/pheval/infra/exomiserdb.py 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 class DBConnector : def __init__ ( self , jar : Path , driver : str , server : str , database : str , user : str , password : str ): self . jar = jar self . driver = driver self . server = server self . database = database self . user = user self . password = password self . dbconn = None def create_connection ( self ) -> jaydebeapi . Connection : \"\"\"creates h2 database connection\"\"\" return jaydebeapi . connect ( self . driver , f \" { self . server }{ self . database } \" , [ self . user , self . password ], self . jar , ) def __enter__ ( self ) -> jaydebeapi . Connection : self . dbconn = self . create_connection () return self . dbconn def __exit__ ( self , * other ): self . dbconn . close ()","title":"DBConnector"},{"location":"api/pheval/infra/exomiserdb/#src.pheval.infra.exomiserdb.DBConnector.create_connection","text":"creates h2 database connection Source code in src/pheval/infra/exomiserdb.py 26 27 28 29 30 31 32 33 def create_connection ( self ) -> jaydebeapi . Connection : \"\"\"creates h2 database connection\"\"\" return jaydebeapi . connect ( self . driver , f \" { self . server }{ self . database } \" , [ self . user , self . password ], self . jar , )","title":"create_connection"},{"location":"api/pheval/infra/exomiserdb/#src.pheval.infra.exomiserdb.ExomiserDB","text":"Source code in src/pheval/infra/exomiserdb.py 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 class ExomiserDB : def __init__ ( self , db_path : Path ): try : self . connector = DBConnector ( # noqa jar = os . path . join ( os . path . dirname ( __file__ ), \"../../../lib/h2-1.4.199.jar\" ), driver = \"org.h2.Driver\" , server = f \"jdbc:h2: { db_path } \" , user = \"sa\" , password = \"\" , database = \"\" , ) except Exception as e : print ( \"An exception occurred\" , e ) def import_from_semsim_file ( self , input_file : Path , subject_prefix : str , object_prefix : str ): \"\"\"imports semsim tsv profile into exomiser phenotype database Args: input_file (Path): semsim profile subject_prefix (str): Subject Prefix. e.g HP object_prefix (str): Object Prefix. e.g MP \"\"\" with self . connector as cnn : conn = DBConnection ( cnn ) reader = pl . read_csv_batched ( input_file , separator = \" \\t \" ) batch_length = 5 batches = reader . next_batches ( batch_length ) cursor = conn . get_cursor () # # TODO: Refactor this with open ( input_file , \"r\" ) as f : total = sum ( 1 for line in f ) pbar = tqdm ( total = total - 1 ) mapping_id = 1 while batches : input_data = pl . concat ( batches ) sql = _semsim2h2 ( input_data , object_prefix , subject_prefix , mapping_id = mapping_id ) cursor . execute ( sql ) len_input_data = len ( input_data ) mapping_id += len_input_data pbar . update ( len_input_data ) batches = reader . next_batches ( batch_length )","title":"ExomiserDB"},{"location":"api/pheval/infra/exomiserdb/#src.pheval.infra.exomiserdb.ExomiserDB.import_from_semsim_file","text":"imports semsim tsv profile into exomiser phenotype database Parameters: Name Type Description Default input_file Path semsim profile required subject_prefix str Subject Prefix. e.g HP required object_prefix str Object Prefix. e.g MP required Source code in src/pheval/infra/exomiserdb.py 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 def import_from_semsim_file ( self , input_file : Path , subject_prefix : str , object_prefix : str ): \"\"\"imports semsim tsv profile into exomiser phenotype database Args: input_file (Path): semsim profile subject_prefix (str): Subject Prefix. e.g HP object_prefix (str): Object Prefix. e.g MP \"\"\" with self . connector as cnn : conn = DBConnection ( cnn ) reader = pl . read_csv_batched ( input_file , separator = \" \\t \" ) batch_length = 5 batches = reader . next_batches ( batch_length ) cursor = conn . get_cursor () # # TODO: Refactor this with open ( input_file , \"r\" ) as f : total = sum ( 1 for line in f ) pbar = tqdm ( total = total - 1 ) mapping_id = 1 while batches : input_data = pl . concat ( batches ) sql = _semsim2h2 ( input_data , object_prefix , subject_prefix , mapping_id = mapping_id ) cursor . execute ( sql ) len_input_data = len ( input_data ) mapping_id += len_input_data pbar . update ( len_input_data ) batches = reader . next_batches ( batch_length )","title":"import_from_semsim_file"},{"location":"api/pheval/post_processing/post_processing/","text":"PhEvalDiseaseResult dataclass Bases: PhEvalResult Minimal data required from tool-specific output for disease prioritisation Args: disease_name (str): Disease name for the result entry disease_identifier (str): Identifier for the disease result entry in the OMIM namespace score (str): Score for the disease result entry Notes: While we recommend providing the disease identifier in the OMIM namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. Source code in src/pheval/post_processing/post_processing.py 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 @dataclass class PhEvalDiseaseResult ( PhEvalResult ): \"\"\"Minimal data required from tool-specific output for disease prioritisation Args: disease_name (str): Disease name for the result entry disease_identifier (str): Identifier for the disease result entry in the OMIM namespace score (str): Score for the disease result entry Notes: While we recommend providing the disease identifier in the OMIM namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. \"\"\" disease_name : str disease_identifier : str score : float PhEvalGeneResult dataclass Bases: PhEvalResult Minimal data required from tool-specific output for gene prioritisation result Args: gene_symbol (Union[List[str], str]): The gene symbol(s) for the result entry gene_identifier (Union[List[str], str]): The ENSEMBL gene identifier(s) for the result entry score (float): The score for the gene result entry Notes: While we recommend providing the gene identifier in the ENSEMBL namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. Source code in src/pheval/post_processing/post_processing.py 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 @dataclass class PhEvalGeneResult ( PhEvalResult ): \"\"\"Minimal data required from tool-specific output for gene prioritisation result Args: gene_symbol (Union[List[str], str]): The gene symbol(s) for the result entry gene_identifier (Union[List[str], str]): The ENSEMBL gene identifier(s) for the result entry score (float): The score for the gene result entry Notes: While we recommend providing the gene identifier in the ENSEMBL namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. \"\"\" gene_symbol : Union [ List [ str ], str ] gene_identifier : Union [ List [ str ], str ] score : float PhEvalResult dataclass Base class for PhEval results. Source code in src/pheval/post_processing/post_processing.py 25 26 27 @dataclass class PhEvalResult : \"\"\"Base class for PhEval results.\"\"\" PhEvalVariantResult dataclass Bases: PhEvalResult Minimal data required from tool-specific output for variant prioritisation Args: chromosome (str): The chromosome position of the variant recommended to be provided in the following format. This includes numerical designations from 1 to 22 representing autosomal chromosomes, as well as the sex chromosomes X and Y, and the mitochondrial chromosome MT. start (int): The start position of the variant end (int): The end position of the variant ref (str): The reference allele of the variant alt (str): The alternate allele of the variant score (float): The score for the variant result entry Notes: While we recommend providing the variant's chromosome in the specified format, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. Source code in src/pheval/post_processing/post_processing.py 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 @dataclass class PhEvalVariantResult ( PhEvalResult ): \"\"\"Minimal data required from tool-specific output for variant prioritisation Args: chromosome (str): The chromosome position of the variant recommended to be provided in the following format. This includes numerical designations from 1 to 22 representing autosomal chromosomes, as well as the sex chromosomes X and Y, and the mitochondrial chromosome MT. start (int): The start position of the variant end (int): The end position of the variant ref (str): The reference allele of the variant alt (str): The alternate allele of the variant score (float): The score for the variant result entry Notes: While we recommend providing the variant's chromosome in the specified format, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. \"\"\" chromosome : str start : int end : int ref : str alt : str score : float RankedPhEvalDiseaseResult dataclass Bases: PhEvalDiseaseResult PhEval disease result with corresponding rank Args: rank (int): The rank for the result entry Source code in src/pheval/post_processing/post_processing.py 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 @dataclass class RankedPhEvalDiseaseResult ( PhEvalDiseaseResult ): \"\"\"PhEval disease result with corresponding rank Args: rank (int): The rank for the result entry \"\"\" rank : int @staticmethod def from_disease_result ( pheval_disease_result : PhEvalDiseaseResult , rank : int ): \"\"\"Return RankedPhEvalDiseaseResult from a PhEvalDiseaseResult and rank Args: pheval_disease_result (PhEvalDiseaseResult): The disease result entry rank (int): The corresponding rank for the result entry Returns: RankedPhEvalDiseaseResult: The result as a RankedPhEvalDiseaseResult \"\"\" return RankedPhEvalDiseaseResult ( disease_name = pheval_disease_result . disease_name , disease_identifier = pheval_disease_result . disease_identifier , score = pheval_disease_result . score , rank = rank , ) from_disease_result ( pheval_disease_result , rank ) staticmethod Return RankedPhEvalDiseaseResult from a PhEvalDiseaseResult and rank Args: pheval_disease_result (PhEvalDiseaseResult): The disease result entry rank (int): The corresponding rank for the result entry Returns: Name Type Description RankedPhEvalDiseaseResult The result as a RankedPhEvalDiseaseResult Source code in src/pheval/post_processing/post_processing.py 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 @staticmethod def from_disease_result ( pheval_disease_result : PhEvalDiseaseResult , rank : int ): \"\"\"Return RankedPhEvalDiseaseResult from a PhEvalDiseaseResult and rank Args: pheval_disease_result (PhEvalDiseaseResult): The disease result entry rank (int): The corresponding rank for the result entry Returns: RankedPhEvalDiseaseResult: The result as a RankedPhEvalDiseaseResult \"\"\" return RankedPhEvalDiseaseResult ( disease_name = pheval_disease_result . disease_name , disease_identifier = pheval_disease_result . disease_identifier , score = pheval_disease_result . score , rank = rank , ) RankedPhEvalGeneResult dataclass Bases: PhEvalGeneResult PhEval gene result with corresponding rank Args: rank (int): The rank for the result entry Source code in src/pheval/post_processing/post_processing.py 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 @dataclass class RankedPhEvalGeneResult ( PhEvalGeneResult ): \"\"\"PhEval gene result with corresponding rank Args: rank (int): The rank for the result entry \"\"\" rank : int @staticmethod def from_gene_result ( pheval_gene_result : PhEvalGeneResult , rank : int ): \"\"\"Return RankedPhEvalGeneResult from a PhEvalGeneResult and rank Args: pheval_gene_result (PhEvalGeneResult): The gene result entry rank (int): The corresponding rank for the result entry Returns: RankedPhEvalGeneResult: The result as a RankedPhEvalGeneResult \"\"\" return RankedPhEvalGeneResult ( gene_symbol = pheval_gene_result . gene_symbol , gene_identifier = pheval_gene_result . gene_identifier , score = pheval_gene_result . score , rank = rank , ) from_gene_result ( pheval_gene_result , rank ) staticmethod Return RankedPhEvalGeneResult from a PhEvalGeneResult and rank Args: pheval_gene_result (PhEvalGeneResult): The gene result entry rank (int): The corresponding rank for the result entry Returns: Name Type Description RankedPhEvalGeneResult The result as a RankedPhEvalGeneResult Source code in src/pheval/post_processing/post_processing.py 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 @staticmethod def from_gene_result ( pheval_gene_result : PhEvalGeneResult , rank : int ): \"\"\"Return RankedPhEvalGeneResult from a PhEvalGeneResult and rank Args: pheval_gene_result (PhEvalGeneResult): The gene result entry rank (int): The corresponding rank for the result entry Returns: RankedPhEvalGeneResult: The result as a RankedPhEvalGeneResult \"\"\" return RankedPhEvalGeneResult ( gene_symbol = pheval_gene_result . gene_symbol , gene_identifier = pheval_gene_result . gene_identifier , score = pheval_gene_result . score , rank = rank , ) RankedPhEvalVariantResult dataclass Bases: PhEvalVariantResult PhEval variant result with corresponding rank Args: rank (int): The rank for the result entry Source code in src/pheval/post_processing/post_processing.py 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 @dataclass class RankedPhEvalVariantResult ( PhEvalVariantResult ): \"\"\"PhEval variant result with corresponding rank Args: rank (int): The rank for the result entry \"\"\" rank : int @staticmethod def from_variant_result ( pheval_variant_result : PhEvalVariantResult , rank : int ): \"\"\"Return RankedPhEvalVariantResult from a PhEvalVariantResult and rank Args: pheval_variant_result (PhEvalVariantResult): The variant result entry rank (int): The corresponding rank for the result entry Returns: RankedPhEvalVariantResult: The result as a RankedPhEvalVariantResult \"\"\" return RankedPhEvalVariantResult ( chromosome = pheval_variant_result . chromosome , start = pheval_variant_result . start , end = pheval_variant_result . end , ref = pheval_variant_result . ref , alt = pheval_variant_result . alt , score = pheval_variant_result . score , rank = rank , ) from_variant_result ( pheval_variant_result , rank ) staticmethod Return RankedPhEvalVariantResult from a PhEvalVariantResult and rank Args: pheval_variant_result (PhEvalVariantResult): The variant result entry rank (int): The corresponding rank for the result entry Returns: Name Type Description RankedPhEvalVariantResult The result as a RankedPhEvalVariantResult Source code in src/pheval/post_processing/post_processing.py 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 @staticmethod def from_variant_result ( pheval_variant_result : PhEvalVariantResult , rank : int ): \"\"\"Return RankedPhEvalVariantResult from a PhEvalVariantResult and rank Args: pheval_variant_result (PhEvalVariantResult): The variant result entry rank (int): The corresponding rank for the result entry Returns: RankedPhEvalVariantResult: The result as a RankedPhEvalVariantResult \"\"\" return RankedPhEvalVariantResult ( chromosome = pheval_variant_result . chromosome , start = pheval_variant_result . start , end = pheval_variant_result . end , ref = pheval_variant_result . ref , alt = pheval_variant_result . alt , score = pheval_variant_result . score , rank = rank , ) ResultSorter Class for sorting PhEvalResult instances based on a given sort order. Source code in src/pheval/post_processing/post_processing.py 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 class ResultSorter : \"\"\"Class for sorting PhEvalResult instances based on a given sort order.\"\"\" def __init__ ( self , pheval_results : [ PhEvalResult ], sort_order : SortOrder ): \"\"\" Initialise ResultSorter Args: pheval_results ([PhEvalResult]): List of PhEvalResult instances to be sorted sort_order (SortOrder): Sorting order to be applied \"\"\" self . pheval_results = pheval_results self . sort_order = sort_order def _sort_by_decreasing_score ( self ) -> [ PhEvalResult ]: \"\"\" Sort results in descending order based on the score Returns: [PhEvalResult]: Sorted list of PhEvalResult instances. \"\"\" return sorted ( self . pheval_results , key = operator . attrgetter ( \"score\" ), reverse = True ) def _sort_by_increasing_score ( self ) -> [ PhEvalResult ]: \"\"\" Sort results in ascending order based on the score Returns: [PhEvalResult]: Sorted list of PhEvalResult instances. \"\"\" return sorted ( self . pheval_results , key = operator . attrgetter ( \"score\" ), reverse = False ) def sort_pheval_results ( self ) -> [ PhEvalResult ]: \"\"\" Sort results based on the specified sort order. Returns: [PhEvalResult]: Sorted list of PhEvalResult instances. \"\"\" return ( self . _sort_by_increasing_score () if self . sort_order == SortOrder . ASCENDING else self . _sort_by_decreasing_score () ) __init__ ( pheval_results , sort_order ) Initialise ResultSorter Parameters: Name Type Description Default pheval_results [ PhEvalResult ] List of PhEvalResult instances to be sorted required sort_order SortOrder Sorting order to be applied required Source code in src/pheval/post_processing/post_processing.py 188 189 190 191 192 193 194 195 196 197 def __init__ ( self , pheval_results : [ PhEvalResult ], sort_order : SortOrder ): \"\"\" Initialise ResultSorter Args: pheval_results ([PhEvalResult]): List of PhEvalResult instances to be sorted sort_order (SortOrder): Sorting order to be applied \"\"\" self . pheval_results = pheval_results self . sort_order = sort_order sort_pheval_results () Sort results based on the specified sort order. Returns: Type Description [ PhEvalResult ] [PhEvalResult]: Sorted list of PhEvalResult instances. Source code in src/pheval/post_processing/post_processing.py 217 218 219 220 221 222 223 224 225 226 227 228 def sort_pheval_results ( self ) -> [ PhEvalResult ]: \"\"\" Sort results based on the specified sort order. Returns: [PhEvalResult]: Sorted list of PhEvalResult instances. \"\"\" return ( self . _sort_by_increasing_score () if self . sort_order == SortOrder . ASCENDING else self . _sort_by_decreasing_score () ) SortOrder Bases: Enum Enumeration representing sorting orders. Source code in src/pheval/post_processing/post_processing.py 176 177 178 179 180 181 182 class SortOrder ( Enum ): \"\"\"Enumeration representing sorting orders.\"\"\" ASCENDING = 1 \"\"\"Ascending sort order.\"\"\" DESCENDING = 2 \"\"\"Descending sort order.\"\"\" ASCENDING = 1 class-attribute instance-attribute Ascending sort order. DESCENDING = 2 class-attribute instance-attribute Descending sort order. calculate_end_pos ( variant_start , variant_ref ) Calculate the end position for a variant Args: variant_start (int): The start position of the variant variant_ref (str): The reference allele of the variant Returns: Name Type Description int int The end position of the variant Source code in src/pheval/post_processing/post_processing.py 13 14 15 16 17 18 19 20 21 22 def calculate_end_pos ( variant_start : int , variant_ref : str ) -> int : \"\"\"Calculate the end position for a variant Args: variant_start (int): The start position of the variant variant_ref (str): The reference allele of the variant Returns: int: The end position of the variant \"\"\" return variant_start + len ( variant_ref ) - 1 generate_pheval_result ( pheval_result , sort_order_str , output_dir , tool_result_path ) Generate PhEval variant, gene or disease TSV result based on input results. Parameters: Name Type Description Default pheval_result [ PhEvalResult ] List of PhEvalResult instances to be processed. required sort_order_str str String representation of the desired sorting order. required output_dir Path Path to the output directory. required tool_result_path Path Path to the tool-specific result file. required Raises: Type Description ValueError If the results are not all the same type or an error occurs during file writing. Source code in src/pheval/post_processing/post_processing.py 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 def generate_pheval_result ( pheval_result : [ PhEvalResult ], sort_order_str : str , output_dir : Path , tool_result_path : Path , ) -> None : \"\"\" Generate PhEval variant, gene or disease TSV result based on input results. Args: pheval_result ([PhEvalResult]): List of PhEvalResult instances to be processed. sort_order_str (str): String representation of the desired sorting order. output_dir (Path): Path to the output directory. tool_result_path (Path): Path to the tool-specific result file. Raises: ValueError: If the results are not all the same type or an error occurs during file writing. \"\"\" if not pheval_result : info_log . warning ( f \"No results found for { tool_result_path . name } \" ) return ranked_pheval_result = _create_pheval_result ( pheval_result , sort_order_str ) if all ( isinstance ( result , PhEvalGeneResult ) for result in pheval_result ): _write_pheval_gene_result ( ranked_pheval_result , output_dir , tool_result_path ) elif all ( isinstance ( result , PhEvalVariantResult ) for result in pheval_result ): _write_pheval_variant_result ( ranked_pheval_result , output_dir , tool_result_path ) elif all ( isinstance ( result , PhEvalDiseaseResult ) for result in pheval_result ): _write_pheval_disease_result ( ranked_pheval_result , output_dir , tool_result_path ) else : raise ValueError ( \"Results are not all of the same type.\" )","title":"Post processing"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.PhEvalDiseaseResult","text":"Bases: PhEvalResult Minimal data required from tool-specific output for disease prioritisation Args: disease_name (str): Disease name for the result entry disease_identifier (str): Identifier for the disease result entry in the OMIM namespace score (str): Score for the disease result entry Notes: While we recommend providing the disease identifier in the OMIM namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. Source code in src/pheval/post_processing/post_processing.py 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 @dataclass class PhEvalDiseaseResult ( PhEvalResult ): \"\"\"Minimal data required from tool-specific output for disease prioritisation Args: disease_name (str): Disease name for the result entry disease_identifier (str): Identifier for the disease result entry in the OMIM namespace score (str): Score for the disease result entry Notes: While we recommend providing the disease identifier in the OMIM namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. \"\"\" disease_name : str disease_identifier : str score : float","title":"PhEvalDiseaseResult"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.PhEvalGeneResult","text":"Bases: PhEvalResult Minimal data required from tool-specific output for gene prioritisation result Args: gene_symbol (Union[List[str], str]): The gene symbol(s) for the result entry gene_identifier (Union[List[str], str]): The ENSEMBL gene identifier(s) for the result entry score (float): The score for the gene result entry Notes: While we recommend providing the gene identifier in the ENSEMBL namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. Source code in src/pheval/post_processing/post_processing.py 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 @dataclass class PhEvalGeneResult ( PhEvalResult ): \"\"\"Minimal data required from tool-specific output for gene prioritisation result Args: gene_symbol (Union[List[str], str]): The gene symbol(s) for the result entry gene_identifier (Union[List[str], str]): The ENSEMBL gene identifier(s) for the result entry score (float): The score for the gene result entry Notes: While we recommend providing the gene identifier in the ENSEMBL namespace, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. \"\"\" gene_symbol : Union [ List [ str ], str ] gene_identifier : Union [ List [ str ], str ] score : float","title":"PhEvalGeneResult"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.PhEvalResult","text":"Base class for PhEval results. Source code in src/pheval/post_processing/post_processing.py 25 26 27 @dataclass class PhEvalResult : \"\"\"Base class for PhEval results.\"\"\"","title":"PhEvalResult"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.PhEvalVariantResult","text":"Bases: PhEvalResult Minimal data required from tool-specific output for variant prioritisation Args: chromosome (str): The chromosome position of the variant recommended to be provided in the following format. This includes numerical designations from 1 to 22 representing autosomal chromosomes, as well as the sex chromosomes X and Y, and the mitochondrial chromosome MT. start (int): The start position of the variant end (int): The end position of the variant ref (str): The reference allele of the variant alt (str): The alternate allele of the variant score (float): The score for the variant result entry Notes: While we recommend providing the variant's chromosome in the specified format, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. Source code in src/pheval/post_processing/post_processing.py 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 @dataclass class PhEvalVariantResult ( PhEvalResult ): \"\"\"Minimal data required from tool-specific output for variant prioritisation Args: chromosome (str): The chromosome position of the variant recommended to be provided in the following format. This includes numerical designations from 1 to 22 representing autosomal chromosomes, as well as the sex chromosomes X and Y, and the mitochondrial chromosome MT. start (int): The start position of the variant end (int): The end position of the variant ref (str): The reference allele of the variant alt (str): The alternate allele of the variant score (float): The score for the variant result entry Notes: While we recommend providing the variant's chromosome in the specified format, any matching format used in Phenopacket interpretations is acceptable for result matching purposes in the analysis. \"\"\" chromosome : str start : int end : int ref : str alt : str score : float","title":"PhEvalVariantResult"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.RankedPhEvalDiseaseResult","text":"Bases: PhEvalDiseaseResult PhEval disease result with corresponding rank Args: rank (int): The rank for the result entry Source code in src/pheval/post_processing/post_processing.py 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 @dataclass class RankedPhEvalDiseaseResult ( PhEvalDiseaseResult ): \"\"\"PhEval disease result with corresponding rank Args: rank (int): The rank for the result entry \"\"\" rank : int @staticmethod def from_disease_result ( pheval_disease_result : PhEvalDiseaseResult , rank : int ): \"\"\"Return RankedPhEvalDiseaseResult from a PhEvalDiseaseResult and rank Args: pheval_disease_result (PhEvalDiseaseResult): The disease result entry rank (int): The corresponding rank for the result entry Returns: RankedPhEvalDiseaseResult: The result as a RankedPhEvalDiseaseResult \"\"\" return RankedPhEvalDiseaseResult ( disease_name = pheval_disease_result . disease_name , disease_identifier = pheval_disease_result . disease_identifier , score = pheval_disease_result . score , rank = rank , )","title":"RankedPhEvalDiseaseResult"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.RankedPhEvalDiseaseResult.from_disease_result","text":"Return RankedPhEvalDiseaseResult from a PhEvalDiseaseResult and rank Args: pheval_disease_result (PhEvalDiseaseResult): The disease result entry rank (int): The corresponding rank for the result entry Returns: Name Type Description RankedPhEvalDiseaseResult The result as a RankedPhEvalDiseaseResult Source code in src/pheval/post_processing/post_processing.py 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 @staticmethod def from_disease_result ( pheval_disease_result : PhEvalDiseaseResult , rank : int ): \"\"\"Return RankedPhEvalDiseaseResult from a PhEvalDiseaseResult and rank Args: pheval_disease_result (PhEvalDiseaseResult): The disease result entry rank (int): The corresponding rank for the result entry Returns: RankedPhEvalDiseaseResult: The result as a RankedPhEvalDiseaseResult \"\"\" return RankedPhEvalDiseaseResult ( disease_name = pheval_disease_result . disease_name , disease_identifier = pheval_disease_result . disease_identifier , score = pheval_disease_result . score , rank = rank , )","title":"from_disease_result"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.RankedPhEvalGeneResult","text":"Bases: PhEvalGeneResult PhEval gene result with corresponding rank Args: rank (int): The rank for the result entry Source code in src/pheval/post_processing/post_processing.py 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 @dataclass class RankedPhEvalGeneResult ( PhEvalGeneResult ): \"\"\"PhEval gene result with corresponding rank Args: rank (int): The rank for the result entry \"\"\" rank : int @staticmethod def from_gene_result ( pheval_gene_result : PhEvalGeneResult , rank : int ): \"\"\"Return RankedPhEvalGeneResult from a PhEvalGeneResult and rank Args: pheval_gene_result (PhEvalGeneResult): The gene result entry rank (int): The corresponding rank for the result entry Returns: RankedPhEvalGeneResult: The result as a RankedPhEvalGeneResult \"\"\" return RankedPhEvalGeneResult ( gene_symbol = pheval_gene_result . gene_symbol , gene_identifier = pheval_gene_result . gene_identifier , score = pheval_gene_result . score , rank = rank , )","title":"RankedPhEvalGeneResult"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.RankedPhEvalGeneResult.from_gene_result","text":"Return RankedPhEvalGeneResult from a PhEvalGeneResult and rank Args: pheval_gene_result (PhEvalGeneResult): The gene result entry rank (int): The corresponding rank for the result entry Returns: Name Type Description RankedPhEvalGeneResult The result as a RankedPhEvalGeneResult Source code in src/pheval/post_processing/post_processing.py 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 @staticmethod def from_gene_result ( pheval_gene_result : PhEvalGeneResult , rank : int ): \"\"\"Return RankedPhEvalGeneResult from a PhEvalGeneResult and rank Args: pheval_gene_result (PhEvalGeneResult): The gene result entry rank (int): The corresponding rank for the result entry Returns: RankedPhEvalGeneResult: The result as a RankedPhEvalGeneResult \"\"\" return RankedPhEvalGeneResult ( gene_symbol = pheval_gene_result . gene_symbol , gene_identifier = pheval_gene_result . gene_identifier , score = pheval_gene_result . score , rank = rank , )","title":"from_gene_result"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.RankedPhEvalVariantResult","text":"Bases: PhEvalVariantResult PhEval variant result with corresponding rank Args: rank (int): The rank for the result entry Source code in src/pheval/post_processing/post_processing.py 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 @dataclass class RankedPhEvalVariantResult ( PhEvalVariantResult ): \"\"\"PhEval variant result with corresponding rank Args: rank (int): The rank for the result entry \"\"\" rank : int @staticmethod def from_variant_result ( pheval_variant_result : PhEvalVariantResult , rank : int ): \"\"\"Return RankedPhEvalVariantResult from a PhEvalVariantResult and rank Args: pheval_variant_result (PhEvalVariantResult): The variant result entry rank (int): The corresponding rank for the result entry Returns: RankedPhEvalVariantResult: The result as a RankedPhEvalVariantResult \"\"\" return RankedPhEvalVariantResult ( chromosome = pheval_variant_result . chromosome , start = pheval_variant_result . start , end = pheval_variant_result . end , ref = pheval_variant_result . ref , alt = pheval_variant_result . alt , score = pheval_variant_result . score , rank = rank , )","title":"RankedPhEvalVariantResult"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.RankedPhEvalVariantResult.from_variant_result","text":"Return RankedPhEvalVariantResult from a PhEvalVariantResult and rank Args: pheval_variant_result (PhEvalVariantResult): The variant result entry rank (int): The corresponding rank for the result entry Returns: Name Type Description RankedPhEvalVariantResult The result as a RankedPhEvalVariantResult Source code in src/pheval/post_processing/post_processing.py 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 @staticmethod def from_variant_result ( pheval_variant_result : PhEvalVariantResult , rank : int ): \"\"\"Return RankedPhEvalVariantResult from a PhEvalVariantResult and rank Args: pheval_variant_result (PhEvalVariantResult): The variant result entry rank (int): The corresponding rank for the result entry Returns: RankedPhEvalVariantResult: The result as a RankedPhEvalVariantResult \"\"\" return RankedPhEvalVariantResult ( chromosome = pheval_variant_result . chromosome , start = pheval_variant_result . start , end = pheval_variant_result . end , ref = pheval_variant_result . ref , alt = pheval_variant_result . alt , score = pheval_variant_result . score , rank = rank , )","title":"from_variant_result"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.ResultSorter","text":"Class for sorting PhEvalResult instances based on a given sort order. Source code in src/pheval/post_processing/post_processing.py 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 class ResultSorter : \"\"\"Class for sorting PhEvalResult instances based on a given sort order.\"\"\" def __init__ ( self , pheval_results : [ PhEvalResult ], sort_order : SortOrder ): \"\"\" Initialise ResultSorter Args: pheval_results ([PhEvalResult]): List of PhEvalResult instances to be sorted sort_order (SortOrder): Sorting order to be applied \"\"\" self . pheval_results = pheval_results self . sort_order = sort_order def _sort_by_decreasing_score ( self ) -> [ PhEvalResult ]: \"\"\" Sort results in descending order based on the score Returns: [PhEvalResult]: Sorted list of PhEvalResult instances. \"\"\" return sorted ( self . pheval_results , key = operator . attrgetter ( \"score\" ), reverse = True ) def _sort_by_increasing_score ( self ) -> [ PhEvalResult ]: \"\"\" Sort results in ascending order based on the score Returns: [PhEvalResult]: Sorted list of PhEvalResult instances. \"\"\" return sorted ( self . pheval_results , key = operator . attrgetter ( \"score\" ), reverse = False ) def sort_pheval_results ( self ) -> [ PhEvalResult ]: \"\"\" Sort results based on the specified sort order. Returns: [PhEvalResult]: Sorted list of PhEvalResult instances. \"\"\" return ( self . _sort_by_increasing_score () if self . sort_order == SortOrder . ASCENDING else self . _sort_by_decreasing_score () )","title":"ResultSorter"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.ResultSorter.__init__","text":"Initialise ResultSorter Parameters: Name Type Description Default pheval_results [ PhEvalResult ] List of PhEvalResult instances to be sorted required sort_order SortOrder Sorting order to be applied required Source code in src/pheval/post_processing/post_processing.py 188 189 190 191 192 193 194 195 196 197 def __init__ ( self , pheval_results : [ PhEvalResult ], sort_order : SortOrder ): \"\"\" Initialise ResultSorter Args: pheval_results ([PhEvalResult]): List of PhEvalResult instances to be sorted sort_order (SortOrder): Sorting order to be applied \"\"\" self . pheval_results = pheval_results self . sort_order = sort_order","title":"__init__"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.ResultSorter.sort_pheval_results","text":"Sort results based on the specified sort order. Returns: Type Description [ PhEvalResult ] [PhEvalResult]: Sorted list of PhEvalResult instances. Source code in src/pheval/post_processing/post_processing.py 217 218 219 220 221 222 223 224 225 226 227 228 def sort_pheval_results ( self ) -> [ PhEvalResult ]: \"\"\" Sort results based on the specified sort order. Returns: [PhEvalResult]: Sorted list of PhEvalResult instances. \"\"\" return ( self . _sort_by_increasing_score () if self . sort_order == SortOrder . ASCENDING else self . _sort_by_decreasing_score () )","title":"sort_pheval_results"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.SortOrder","text":"Bases: Enum Enumeration representing sorting orders. Source code in src/pheval/post_processing/post_processing.py 176 177 178 179 180 181 182 class SortOrder ( Enum ): \"\"\"Enumeration representing sorting orders.\"\"\" ASCENDING = 1 \"\"\"Ascending sort order.\"\"\" DESCENDING = 2 \"\"\"Descending sort order.\"\"\"","title":"SortOrder"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.SortOrder.ASCENDING","text":"Ascending sort order.","title":"ASCENDING"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.SortOrder.DESCENDING","text":"Descending sort order.","title":"DESCENDING"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.calculate_end_pos","text":"Calculate the end position for a variant Args: variant_start (int): The start position of the variant variant_ref (str): The reference allele of the variant Returns: Name Type Description int int The end position of the variant Source code in src/pheval/post_processing/post_processing.py 13 14 15 16 17 18 19 20 21 22 def calculate_end_pos ( variant_start : int , variant_ref : str ) -> int : \"\"\"Calculate the end position for a variant Args: variant_start (int): The start position of the variant variant_ref (str): The reference allele of the variant Returns: int: The end position of the variant \"\"\" return variant_start + len ( variant_ref ) - 1","title":"calculate_end_pos"},{"location":"api/pheval/post_processing/post_processing/#src.pheval.post_processing.post_processing.generate_pheval_result","text":"Generate PhEval variant, gene or disease TSV result based on input results. Parameters: Name Type Description Default pheval_result [ PhEvalResult ] List of PhEvalResult instances to be processed. required sort_order_str str String representation of the desired sorting order. required output_dir Path Path to the output directory. required tool_result_path Path Path to the tool-specific result file. required Raises: Type Description ValueError If the results are not all the same type or an error occurs during file writing. Source code in src/pheval/post_processing/post_processing.py 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 def generate_pheval_result ( pheval_result : [ PhEvalResult ], sort_order_str : str , output_dir : Path , tool_result_path : Path , ) -> None : \"\"\" Generate PhEval variant, gene or disease TSV result based on input results. Args: pheval_result ([PhEvalResult]): List of PhEvalResult instances to be processed. sort_order_str (str): String representation of the desired sorting order. output_dir (Path): Path to the output directory. tool_result_path (Path): Path to the tool-specific result file. Raises: ValueError: If the results are not all the same type or an error occurs during file writing. \"\"\" if not pheval_result : info_log . warning ( f \"No results found for { tool_result_path . name } \" ) return ranked_pheval_result = _create_pheval_result ( pheval_result , sort_order_str ) if all ( isinstance ( result , PhEvalGeneResult ) for result in pheval_result ): _write_pheval_gene_result ( ranked_pheval_result , output_dir , tool_result_path ) elif all ( isinstance ( result , PhEvalVariantResult ) for result in pheval_result ): _write_pheval_variant_result ( ranked_pheval_result , output_dir , tool_result_path ) elif all ( isinstance ( result , PhEvalDiseaseResult ) for result in pheval_result ): _write_pheval_disease_result ( ranked_pheval_result , output_dir , tool_result_path ) else : raise ValueError ( \"Results are not all of the same type.\" )","title":"generate_pheval_result"},{"location":"api/pheval/prepare/create_noisy_phenopackets/","text":"HpoRandomiser Class for randomising phenopacket phenotypic features using Human Phenotype Ontology (HPO). Source code in src/pheval/prepare/create_noisy_phenopackets.py 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 class HpoRandomiser : \"\"\"Class for randomising phenopacket phenotypic features using Human Phenotype Ontology (HPO).\"\"\" def __init__ ( self , hpo_ontology : ProntoImplementation , scramble_factor : float ): \"\"\" Initialise the HpoRandomiser. Args: hpo_ontology (ProntoImplementation): The instance of the HPO ontology. scramble_factor (float): A factor for scrambling phenotypic features. \"\"\" self . hpo_ontology = hpo_ontology self . phenotypic_abnormalities = set ( hpo_ontology . roots ( predicates = [ \"HP:0000118\" ])) self . scramble_factor = scramble_factor def scramble_factor_proportions ( self , phenotypic_features : list [ PhenotypicFeature ]) -> int : \"\"\" Calculate the proportion of scrambled HPO terms based on the scramble factor. Args: phenotypic_features (list[PhenotypicFeature]): List of phenotypic features. Returns: int: The calculated number of phenotypic features to be scrambled. \"\"\" if len ( phenotypic_features ) == 1 : return 1 else : return int ( round ( len ( phenotypic_features ) * self . scramble_factor , 0 )) def retrieve_hpo_term ( self , hpo_id : str ) -> PhenotypicFeature : \"\"\" Retrieve an HPO term based on the provided HPO ID. Args: hpo_id (str): The HPO ID of the term to retrieve. Returns: PhenotypicFeature: The PhenotypicFeature object representing the retrieved HPO term. \"\"\" rels = self . hpo_ontology . entity_alias_map ( hpo_id ) hpo_term = \"\" . join ( rels [( list ( rels . keys ())[ 0 ])]) return PhenotypicFeature ( type = OntologyClass ( id = hpo_id , label = hpo_term )) @staticmethod def retain_real_patient_terms ( phenotypic_features : List [ PhenotypicFeature ], number_of_scrambled_terms : int , ) -> List [ PhenotypicFeature ]: \"\"\" Return a list of real patient HPO terms, retaining a specific number of non-scrambled terms. Args: phenotypic_features (List[PhenotypicFeature]): List of phenotypic features. number_of_scrambled_terms (int): The count of scrambled HPO terms. Returns: List[PhenotypicFeature]: A list of non-scrambled (real patient) HPO terms. \"\"\" if len ( phenotypic_features ) > 1 : number_of_real_id = len ( phenotypic_features ) - number_of_scrambled_terms else : number_of_real_id = 1 return random . sample ( phenotypic_features , number_of_real_id ) def convert_patient_terms_to_parent ( self , phenotypic_features : List [ PhenotypicFeature ], retained_phenotypic_features : List [ PhenotypicFeature ], number_of_scrambled_terms : int , ) -> List [ PhenotypicFeature ]: \"\"\" Convert a subset of patient HPO terms to their respective parent terms. Args: phenotypic_features (List[PhenotypicFeature]): List of all phenotypic features. retained_phenotypic_features (List[PhenotypicFeature]): List of retained non-scrambled phenotypic features. number_of_scrambled_terms (int): The count of scrambled HPO terms. Returns: List[PhenotypicFeature]: A list of HPO terms converted to their parent terms. Note: This method identifies a subset of patient HPO terms that are not retained among the non-scrambled phenotypic features and converts them to their respective parent terms. It then returns a list of parent HPO terms based on the provided scrambled terms count. If no remaining HPO terms are available for conversion, no parent terms are returned. \"\"\" remaining_hpo = [ i for i in phenotypic_features if i not in retained_phenotypic_features ] if len ( remaining_hpo ) == 0 : number_of_scrambled_terms = 0 hpo_terms_to_be_changed = list ( random . sample ( remaining_hpo , number_of_scrambled_terms )) parent_terms = [] for term in hpo_terms_to_be_changed : if self . hpo_ontology . label ( term . type . id ) . startswith ( \"obsolete\" ): obsolete_term = self . hpo_ontology . entity_metadata_map ( term . type . id ) updated_term = list ( obsolete_term . values ())[ 0 ][ 0 ] parents = self . hpo_ontology . hierarchical_parents ( updated_term ) else : parents = self . hpo_ontology . hierarchical_parents ( term . type . id ) if not parents : parent_terms . append ( term ) else : parent_terms . append ( self . retrieve_hpo_term ( random . choice ( parents ))) return parent_terms def create_random_hpo_terms ( self , number_of_scrambled_terms : int ) -> List [ PhenotypicFeature ]: \"\"\" Generate a list of random HPO terms. Args: number_of_scrambled_terms (int): The count of random HPO terms to be generated. Returns: List[PhenotypicFeature]: A list of randomly selected HPO terms. \"\"\" random_ids = list ( random . sample ( sorted ( self . phenotypic_abnormalities ), number_of_scrambled_terms ) ) return [ self . retrieve_hpo_term ( random_id ) for random_id in random_ids ] def randomise_hpo_terms ( self , phenotypic_features : List [ PhenotypicFeature ], ) -> List [ PhenotypicFeature ]: \"\"\" Randomise the provided phenotypic features by combining retained, parent-converted, and random HPO terms. Args: phenotypic_features (List[PhenotypicFeature]): List of phenotypic features to be randomised. Returns: List[PhenotypicFeature]: A list of randomised HPO terms. Note: This method randomises the provided phenotypic features by incorporating three types of HPO terms: 1. Retained Patient Terms: Non-scrambled (real patient) HPO terms retained based on the scramble factor. 2. Converted to Parent Terms: Subset of HPO terms converted to their respective parent terms. 3. Random HPO Terms: Newly generated random HPO terms based on the scramble factor. The method determines the count of terms for each category and combines them to form a final list of randomised HPO terms to be used in the phenotypic features. \"\"\" number_of_scrambled_terms = self . scramble_factor_proportions ( phenotypic_features ) retained_patient_terms = self . retain_real_patient_terms ( phenotypic_features , number_of_scrambled_terms ) return ( retained_patient_terms + self . convert_patient_terms_to_parent ( phenotypic_features , retained_patient_terms , number_of_scrambled_terms ) + self . create_random_hpo_terms ( number_of_scrambled_terms ) ) __init__ ( hpo_ontology , scramble_factor ) Initialise the HpoRandomiser. Parameters: Name Type Description Default hpo_ontology ProntoImplementation The instance of the HPO ontology. required scramble_factor float A factor for scrambling phenotypic features. required Source code in src/pheval/prepare/create_noisy_phenopackets.py 32 33 34 35 36 37 38 39 40 41 42 def __init__ ( self , hpo_ontology : ProntoImplementation , scramble_factor : float ): \"\"\" Initialise the HpoRandomiser. Args: hpo_ontology (ProntoImplementation): The instance of the HPO ontology. scramble_factor (float): A factor for scrambling phenotypic features. \"\"\" self . hpo_ontology = hpo_ontology self . phenotypic_abnormalities = set ( hpo_ontology . roots ( predicates = [ \"HP:0000118\" ])) self . scramble_factor = scramble_factor convert_patient_terms_to_parent ( phenotypic_features , retained_phenotypic_features , number_of_scrambled_terms ) Convert a subset of patient HPO terms to their respective parent terms. Parameters: Name Type Description Default phenotypic_features List [ PhenotypicFeature ] List of all phenotypic features. required retained_phenotypic_features List [ PhenotypicFeature ] List of retained non-scrambled phenotypic features. required number_of_scrambled_terms int The count of scrambled HPO terms. required Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: A list of HPO terms converted to their parent terms. Note This method identifies a subset of patient HPO terms that are not retained among the non-scrambled phenotypic features and converts them to their respective parent terms. It then returns a list of parent HPO terms based on the provided scrambled terms count. If no remaining HPO terms are available for conversion, no parent terms are returned. Source code in src/pheval/prepare/create_noisy_phenopackets.py 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 def convert_patient_terms_to_parent ( self , phenotypic_features : List [ PhenotypicFeature ], retained_phenotypic_features : List [ PhenotypicFeature ], number_of_scrambled_terms : int , ) -> List [ PhenotypicFeature ]: \"\"\" Convert a subset of patient HPO terms to their respective parent terms. Args: phenotypic_features (List[PhenotypicFeature]): List of all phenotypic features. retained_phenotypic_features (List[PhenotypicFeature]): List of retained non-scrambled phenotypic features. number_of_scrambled_terms (int): The count of scrambled HPO terms. Returns: List[PhenotypicFeature]: A list of HPO terms converted to their parent terms. Note: This method identifies a subset of patient HPO terms that are not retained among the non-scrambled phenotypic features and converts them to their respective parent terms. It then returns a list of parent HPO terms based on the provided scrambled terms count. If no remaining HPO terms are available for conversion, no parent terms are returned. \"\"\" remaining_hpo = [ i for i in phenotypic_features if i not in retained_phenotypic_features ] if len ( remaining_hpo ) == 0 : number_of_scrambled_terms = 0 hpo_terms_to_be_changed = list ( random . sample ( remaining_hpo , number_of_scrambled_terms )) parent_terms = [] for term in hpo_terms_to_be_changed : if self . hpo_ontology . label ( term . type . id ) . startswith ( \"obsolete\" ): obsolete_term = self . hpo_ontology . entity_metadata_map ( term . type . id ) updated_term = list ( obsolete_term . values ())[ 0 ][ 0 ] parents = self . hpo_ontology . hierarchical_parents ( updated_term ) else : parents = self . hpo_ontology . hierarchical_parents ( term . type . id ) if not parents : parent_terms . append ( term ) else : parent_terms . append ( self . retrieve_hpo_term ( random . choice ( parents ))) return parent_terms create_random_hpo_terms ( number_of_scrambled_terms ) Generate a list of random HPO terms. Parameters: Name Type Description Default number_of_scrambled_terms int The count of random HPO terms to be generated. required Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: A list of randomly selected HPO terms. Source code in src/pheval/prepare/create_noisy_phenopackets.py 135 136 137 138 139 140 141 142 143 144 145 146 147 148 def create_random_hpo_terms ( self , number_of_scrambled_terms : int ) -> List [ PhenotypicFeature ]: \"\"\" Generate a list of random HPO terms. Args: number_of_scrambled_terms (int): The count of random HPO terms to be generated. Returns: List[PhenotypicFeature]: A list of randomly selected HPO terms. \"\"\" random_ids = list ( random . sample ( sorted ( self . phenotypic_abnormalities ), number_of_scrambled_terms ) ) return [ self . retrieve_hpo_term ( random_id ) for random_id in random_ids ] randomise_hpo_terms ( phenotypic_features ) Randomise the provided phenotypic features by combining retained, parent-converted, and random HPO terms. Parameters: Name Type Description Default phenotypic_features List [ PhenotypicFeature ] List of phenotypic features to be randomised. required Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: A list of randomised HPO terms. Note This method randomises the provided phenotypic features by incorporating three types of HPO terms: 1. Retained Patient Terms: Non-scrambled (real patient) HPO terms retained based on the scramble factor. 2. Converted to Parent Terms: Subset of HPO terms converted to their respective parent terms. 3. Random HPO Terms: Newly generated random HPO terms based on the scramble factor. The method determines the count of terms for each category and combines them to form a final list of randomised HPO terms to be used in the phenotypic features. Source code in src/pheval/prepare/create_noisy_phenopackets.py 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 def randomise_hpo_terms ( self , phenotypic_features : List [ PhenotypicFeature ], ) -> List [ PhenotypicFeature ]: \"\"\" Randomise the provided phenotypic features by combining retained, parent-converted, and random HPO terms. Args: phenotypic_features (List[PhenotypicFeature]): List of phenotypic features to be randomised. Returns: List[PhenotypicFeature]: A list of randomised HPO terms. Note: This method randomises the provided phenotypic features by incorporating three types of HPO terms: 1. Retained Patient Terms: Non-scrambled (real patient) HPO terms retained based on the scramble factor. 2. Converted to Parent Terms: Subset of HPO terms converted to their respective parent terms. 3. Random HPO Terms: Newly generated random HPO terms based on the scramble factor. The method determines the count of terms for each category and combines them to form a final list of randomised HPO terms to be used in the phenotypic features. \"\"\" number_of_scrambled_terms = self . scramble_factor_proportions ( phenotypic_features ) retained_patient_terms = self . retain_real_patient_terms ( phenotypic_features , number_of_scrambled_terms ) return ( retained_patient_terms + self . convert_patient_terms_to_parent ( phenotypic_features , retained_patient_terms , number_of_scrambled_terms ) + self . create_random_hpo_terms ( number_of_scrambled_terms ) ) retain_real_patient_terms ( phenotypic_features , number_of_scrambled_terms ) staticmethod Return a list of real patient HPO terms, retaining a specific number of non-scrambled terms. Parameters: Name Type Description Default phenotypic_features List [ PhenotypicFeature ] List of phenotypic features. required number_of_scrambled_terms int The count of scrambled HPO terms. required Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: A list of non-scrambled (real patient) HPO terms. Source code in src/pheval/prepare/create_noisy_phenopackets.py 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 @staticmethod def retain_real_patient_terms ( phenotypic_features : List [ PhenotypicFeature ], number_of_scrambled_terms : int , ) -> List [ PhenotypicFeature ]: \"\"\" Return a list of real patient HPO terms, retaining a specific number of non-scrambled terms. Args: phenotypic_features (List[PhenotypicFeature]): List of phenotypic features. number_of_scrambled_terms (int): The count of scrambled HPO terms. Returns: List[PhenotypicFeature]: A list of non-scrambled (real patient) HPO terms. \"\"\" if len ( phenotypic_features ) > 1 : number_of_real_id = len ( phenotypic_features ) - number_of_scrambled_terms else : number_of_real_id = 1 return random . sample ( phenotypic_features , number_of_real_id ) retrieve_hpo_term ( hpo_id ) Retrieve an HPO term based on the provided HPO ID. Parameters: Name Type Description Default hpo_id str The HPO ID of the term to retrieve. required Returns: Name Type Description PhenotypicFeature PhenotypicFeature The PhenotypicFeature object representing the retrieved HPO term. Source code in src/pheval/prepare/create_noisy_phenopackets.py 59 60 61 62 63 64 65 66 67 68 69 70 71 def retrieve_hpo_term ( self , hpo_id : str ) -> PhenotypicFeature : \"\"\" Retrieve an HPO term based on the provided HPO ID. Args: hpo_id (str): The HPO ID of the term to retrieve. Returns: PhenotypicFeature: The PhenotypicFeature object representing the retrieved HPO term. \"\"\" rels = self . hpo_ontology . entity_alias_map ( hpo_id ) hpo_term = \"\" . join ( rels [( list ( rels . keys ())[ 0 ])]) return PhenotypicFeature ( type = OntologyClass ( id = hpo_id , label = hpo_term )) scramble_factor_proportions ( phenotypic_features ) Calculate the proportion of scrambled HPO terms based on the scramble factor. Parameters: Name Type Description Default phenotypic_features list [ PhenotypicFeature ] List of phenotypic features. required Returns: Name Type Description int int The calculated number of phenotypic features to be scrambled. Source code in src/pheval/prepare/create_noisy_phenopackets.py 44 45 46 47 48 49 50 51 52 53 54 55 56 57 def scramble_factor_proportions ( self , phenotypic_features : list [ PhenotypicFeature ]) -> int : \"\"\" Calculate the proportion of scrambled HPO terms based on the scramble factor. Args: phenotypic_features (list[PhenotypicFeature]): List of phenotypic features. Returns: int: The calculated number of phenotypic features to be scrambled. \"\"\" if len ( phenotypic_features ) == 1 : return 1 else : return int ( round ( len ( phenotypic_features ) * self . scramble_factor , 0 )) add_noise_to_phenotypic_profile ( hpo_randomiser , phenopacket ) Randomise the phenotypic profile of a Phenopacket or Family. Parameters: Name Type Description Default hpo_randomiser HpoRandomiser An instance of HpoRandomiser used for randomisation. required phenopacket Union [ Phenopacket , Family ] The Phenopacket or Family to be randomised. required Returns: Type Description Union [ Phenopacket , Family ] Union[Phenopacket, Family]: The randomised Phenopacket or Family. Source code in src/pheval/prepare/create_noisy_phenopackets.py 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 def add_noise_to_phenotypic_profile ( hpo_randomiser : HpoRandomiser , phenopacket : Union [ Phenopacket , Family ], ) -> Union [ Phenopacket , Family ]: \"\"\" Randomise the phenotypic profile of a Phenopacket or Family. Args: hpo_randomiser (HpoRandomiser): An instance of HpoRandomiser used for randomisation. phenopacket (Union[Phenopacket, Family]): The Phenopacket or Family to be randomised. Returns: Union[Phenopacket, Family]: The randomised Phenopacket or Family. \"\"\" phenotypic_features = PhenopacketUtil ( phenopacket ) . observed_phenotypic_features () random_phenotypes = hpo_randomiser . randomise_hpo_terms ( phenotypic_features ) randomised_phenopacket = PhenopacketRebuilder ( phenopacket ) . add_randomised_hpo ( random_phenotypes ) return randomised_phenopacket create_scrambled_phenopacket ( output_dir , phenopacket_path , scramble_factor ) Create a scrambled version of a Phenopacket. Parameters: Name Type Description Default output_dir Path The directory to store the output scrambled Phenopacket. required phenopacket_path Path The path to the original Phenopacket file. required scramble_factor float A factor determining the level of scrambling for phenotypic features. required Source code in src/pheval/prepare/create_noisy_phenopackets.py 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 def create_scrambled_phenopacket ( output_dir : Path , phenopacket_path : Path , scramble_factor : float ) -> None : \"\"\" Create a scrambled version of a Phenopacket. Args: output_dir (Path): The directory to store the output scrambled Phenopacket. phenopacket_path (Path): The path to the original Phenopacket file. scramble_factor (float): A factor determining the level of scrambling for phenotypic features. \"\"\" ontology = load_ontology () hpo_randomiser = HpoRandomiser ( ontology , scramble_factor ) phenopacket = phenopacket_reader ( phenopacket_path ) created_noisy_phenopacket = add_noise_to_phenotypic_profile ( hpo_randomiser , phenopacket , ) write_phenopacket ( created_noisy_phenopacket , output_dir . joinpath ( phenopacket_path . name ), ) create_scrambled_phenopackets ( output_dir , phenopacket_dir , scramble_factor ) Create scrambled versions of Phenopackets within a directory. Parameters: Name Type Description Default output_dir Path The directory to store the output scrambled Phenopackets. required phenopacket_dir Path The directory containing the original Phenopacket files. required scramble_factor float A factor determining the level of scrambling for phenotypic features. required Source code in src/pheval/prepare/create_noisy_phenopackets.py 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 def create_scrambled_phenopackets ( output_dir : Path , phenopacket_dir : Path , scramble_factor : float ) -> None : \"\"\" Create scrambled versions of Phenopackets within a directory. Args: output_dir (Path): The directory to store the output scrambled Phenopackets. phenopacket_dir (Path): The directory containing the original Phenopacket files. scramble_factor (float): A factor determining the level of scrambling for phenotypic features. \"\"\" ontology = load_ontology () hpo_randomiser = HpoRandomiser ( ontology , scramble_factor ) phenopacket_files = files_with_suffix ( phenopacket_dir , \".json\" ) for phenopacket_path in phenopacket_files : phenopacket = phenopacket_reader ( phenopacket_path ) created_noisy_phenopacket = add_noise_to_phenotypic_profile ( hpo_randomiser , phenopacket ) write_phenopacket ( created_noisy_phenopacket , output_dir . joinpath ( phenopacket_path . name , ), ) load_ontology () Load the Human Phenotype Ontology (HPO). Returns: Name Type Description ProntoImplementation An instance of ProntoImplementation containing the loaded HPO. Source code in src/pheval/prepare/create_noisy_phenopackets.py 18 19 20 21 22 23 24 25 26 def load_ontology (): \"\"\" Load the Human Phenotype Ontology (HPO). Returns: ProntoImplementation: An instance of ProntoImplementation containing the loaded HPO. \"\"\" resource = OntologyResource ( slug = \"hp.obo\" , local = False ) return ProntoImplementation ( resource ) scramble_phenopackets ( output_dir , phenopacket_path , phenopacket_dir , scramble_factor ) Create scrambled phenopackets from either a single phenopacket or a directory of phenopackets. Parameters: Name Type Description Default output_dir Path The directory to store the output scrambled Phenopackets. required phenopacket_path Path The path to a single Phenopacket file (if applicable). required phenopacket_dir Path The directory containing multiple Phenopacket files (if applicable). required scramble_factor float A factor determining the level of scrambling for phenotypic features. required Source code in src/pheval/prepare/create_noisy_phenopackets.py 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 def scramble_phenopackets ( output_dir : Path , phenopacket_path : Path , phenopacket_dir : Path , scramble_factor : float ) -> None : \"\"\" Create scrambled phenopackets from either a single phenopacket or a directory of phenopackets. Args: output_dir (Path): The directory to store the output scrambled Phenopackets. phenopacket_path (Path): The path to a single Phenopacket file (if applicable). phenopacket_dir (Path): The directory containing multiple Phenopacket files (if applicable). scramble_factor (float): A factor determining the level of scrambling for phenotypic features. \"\"\" output_dir . mkdir ( exist_ok = True ) if phenopacket_path is not None : create_scrambled_phenopacket ( output_dir , phenopacket_path , scramble_factor ) elif phenopacket_dir is not None : create_scrambled_phenopackets ( output_dir , phenopacket_dir , scramble_factor )","title":"Create noisy phenopackets"},{"location":"api/pheval/prepare/create_noisy_phenopackets/#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser","text":"Class for randomising phenopacket phenotypic features using Human Phenotype Ontology (HPO). Source code in src/pheval/prepare/create_noisy_phenopackets.py 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 class HpoRandomiser : \"\"\"Class for randomising phenopacket phenotypic features using Human Phenotype Ontology (HPO).\"\"\" def __init__ ( self , hpo_ontology : ProntoImplementation , scramble_factor : float ): \"\"\" Initialise the HpoRandomiser. Args: hpo_ontology (ProntoImplementation): The instance of the HPO ontology. scramble_factor (float): A factor for scrambling phenotypic features. \"\"\" self . hpo_ontology = hpo_ontology self . phenotypic_abnormalities = set ( hpo_ontology . roots ( predicates = [ \"HP:0000118\" ])) self . scramble_factor = scramble_factor def scramble_factor_proportions ( self , phenotypic_features : list [ PhenotypicFeature ]) -> int : \"\"\" Calculate the proportion of scrambled HPO terms based on the scramble factor. Args: phenotypic_features (list[PhenotypicFeature]): List of phenotypic features. Returns: int: The calculated number of phenotypic features to be scrambled. \"\"\" if len ( phenotypic_features ) == 1 : return 1 else : return int ( round ( len ( phenotypic_features ) * self . scramble_factor , 0 )) def retrieve_hpo_term ( self , hpo_id : str ) -> PhenotypicFeature : \"\"\" Retrieve an HPO term based on the provided HPO ID. Args: hpo_id (str): The HPO ID of the term to retrieve. Returns: PhenotypicFeature: The PhenotypicFeature object representing the retrieved HPO term. \"\"\" rels = self . hpo_ontology . entity_alias_map ( hpo_id ) hpo_term = \"\" . join ( rels [( list ( rels . keys ())[ 0 ])]) return PhenotypicFeature ( type = OntologyClass ( id = hpo_id , label = hpo_term )) @staticmethod def retain_real_patient_terms ( phenotypic_features : List [ PhenotypicFeature ], number_of_scrambled_terms : int , ) -> List [ PhenotypicFeature ]: \"\"\" Return a list of real patient HPO terms, retaining a specific number of non-scrambled terms. Args: phenotypic_features (List[PhenotypicFeature]): List of phenotypic features. number_of_scrambled_terms (int): The count of scrambled HPO terms. Returns: List[PhenotypicFeature]: A list of non-scrambled (real patient) HPO terms. \"\"\" if len ( phenotypic_features ) > 1 : number_of_real_id = len ( phenotypic_features ) - number_of_scrambled_terms else : number_of_real_id = 1 return random . sample ( phenotypic_features , number_of_real_id ) def convert_patient_terms_to_parent ( self , phenotypic_features : List [ PhenotypicFeature ], retained_phenotypic_features : List [ PhenotypicFeature ], number_of_scrambled_terms : int , ) -> List [ PhenotypicFeature ]: \"\"\" Convert a subset of patient HPO terms to their respective parent terms. Args: phenotypic_features (List[PhenotypicFeature]): List of all phenotypic features. retained_phenotypic_features (List[PhenotypicFeature]): List of retained non-scrambled phenotypic features. number_of_scrambled_terms (int): The count of scrambled HPO terms. Returns: List[PhenotypicFeature]: A list of HPO terms converted to their parent terms. Note: This method identifies a subset of patient HPO terms that are not retained among the non-scrambled phenotypic features and converts them to their respective parent terms. It then returns a list of parent HPO terms based on the provided scrambled terms count. If no remaining HPO terms are available for conversion, no parent terms are returned. \"\"\" remaining_hpo = [ i for i in phenotypic_features if i not in retained_phenotypic_features ] if len ( remaining_hpo ) == 0 : number_of_scrambled_terms = 0 hpo_terms_to_be_changed = list ( random . sample ( remaining_hpo , number_of_scrambled_terms )) parent_terms = [] for term in hpo_terms_to_be_changed : if self . hpo_ontology . label ( term . type . id ) . startswith ( \"obsolete\" ): obsolete_term = self . hpo_ontology . entity_metadata_map ( term . type . id ) updated_term = list ( obsolete_term . values ())[ 0 ][ 0 ] parents = self . hpo_ontology . hierarchical_parents ( updated_term ) else : parents = self . hpo_ontology . hierarchical_parents ( term . type . id ) if not parents : parent_terms . append ( term ) else : parent_terms . append ( self . retrieve_hpo_term ( random . choice ( parents ))) return parent_terms def create_random_hpo_terms ( self , number_of_scrambled_terms : int ) -> List [ PhenotypicFeature ]: \"\"\" Generate a list of random HPO terms. Args: number_of_scrambled_terms (int): The count of random HPO terms to be generated. Returns: List[PhenotypicFeature]: A list of randomly selected HPO terms. \"\"\" random_ids = list ( random . sample ( sorted ( self . phenotypic_abnormalities ), number_of_scrambled_terms ) ) return [ self . retrieve_hpo_term ( random_id ) for random_id in random_ids ] def randomise_hpo_terms ( self , phenotypic_features : List [ PhenotypicFeature ], ) -> List [ PhenotypicFeature ]: \"\"\" Randomise the provided phenotypic features by combining retained, parent-converted, and random HPO terms. Args: phenotypic_features (List[PhenotypicFeature]): List of phenotypic features to be randomised. Returns: List[PhenotypicFeature]: A list of randomised HPO terms. Note: This method randomises the provided phenotypic features by incorporating three types of HPO terms: 1. Retained Patient Terms: Non-scrambled (real patient) HPO terms retained based on the scramble factor. 2. Converted to Parent Terms: Subset of HPO terms converted to their respective parent terms. 3. Random HPO Terms: Newly generated random HPO terms based on the scramble factor. The method determines the count of terms for each category and combines them to form a final list of randomised HPO terms to be used in the phenotypic features. \"\"\" number_of_scrambled_terms = self . scramble_factor_proportions ( phenotypic_features ) retained_patient_terms = self . retain_real_patient_terms ( phenotypic_features , number_of_scrambled_terms ) return ( retained_patient_terms + self . convert_patient_terms_to_parent ( phenotypic_features , retained_patient_terms , number_of_scrambled_terms ) + self . create_random_hpo_terms ( number_of_scrambled_terms ) )","title":"HpoRandomiser"},{"location":"api/pheval/prepare/create_noisy_phenopackets/#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.__init__","text":"Initialise the HpoRandomiser. Parameters: Name Type Description Default hpo_ontology ProntoImplementation The instance of the HPO ontology. required scramble_factor float A factor for scrambling phenotypic features. required Source code in src/pheval/prepare/create_noisy_phenopackets.py 32 33 34 35 36 37 38 39 40 41 42 def __init__ ( self , hpo_ontology : ProntoImplementation , scramble_factor : float ): \"\"\" Initialise the HpoRandomiser. Args: hpo_ontology (ProntoImplementation): The instance of the HPO ontology. scramble_factor (float): A factor for scrambling phenotypic features. \"\"\" self . hpo_ontology = hpo_ontology self . phenotypic_abnormalities = set ( hpo_ontology . roots ( predicates = [ \"HP:0000118\" ])) self . scramble_factor = scramble_factor","title":"__init__"},{"location":"api/pheval/prepare/create_noisy_phenopackets/#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.convert_patient_terms_to_parent","text":"Convert a subset of patient HPO terms to their respective parent terms. Parameters: Name Type Description Default phenotypic_features List [ PhenotypicFeature ] List of all phenotypic features. required retained_phenotypic_features List [ PhenotypicFeature ] List of retained non-scrambled phenotypic features. required number_of_scrambled_terms int The count of scrambled HPO terms. required Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: A list of HPO terms converted to their parent terms. Note This method identifies a subset of patient HPO terms that are not retained among the non-scrambled phenotypic features and converts them to their respective parent terms. It then returns a list of parent HPO terms based on the provided scrambled terms count. If no remaining HPO terms are available for conversion, no parent terms are returned. Source code in src/pheval/prepare/create_noisy_phenopackets.py 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 def convert_patient_terms_to_parent ( self , phenotypic_features : List [ PhenotypicFeature ], retained_phenotypic_features : List [ PhenotypicFeature ], number_of_scrambled_terms : int , ) -> List [ PhenotypicFeature ]: \"\"\" Convert a subset of patient HPO terms to their respective parent terms. Args: phenotypic_features (List[PhenotypicFeature]): List of all phenotypic features. retained_phenotypic_features (List[PhenotypicFeature]): List of retained non-scrambled phenotypic features. number_of_scrambled_terms (int): The count of scrambled HPO terms. Returns: List[PhenotypicFeature]: A list of HPO terms converted to their parent terms. Note: This method identifies a subset of patient HPO terms that are not retained among the non-scrambled phenotypic features and converts them to their respective parent terms. It then returns a list of parent HPO terms based on the provided scrambled terms count. If no remaining HPO terms are available for conversion, no parent terms are returned. \"\"\" remaining_hpo = [ i for i in phenotypic_features if i not in retained_phenotypic_features ] if len ( remaining_hpo ) == 0 : number_of_scrambled_terms = 0 hpo_terms_to_be_changed = list ( random . sample ( remaining_hpo , number_of_scrambled_terms )) parent_terms = [] for term in hpo_terms_to_be_changed : if self . hpo_ontology . label ( term . type . id ) . startswith ( \"obsolete\" ): obsolete_term = self . hpo_ontology . entity_metadata_map ( term . type . id ) updated_term = list ( obsolete_term . values ())[ 0 ][ 0 ] parents = self . hpo_ontology . hierarchical_parents ( updated_term ) else : parents = self . hpo_ontology . hierarchical_parents ( term . type . id ) if not parents : parent_terms . append ( term ) else : parent_terms . append ( self . retrieve_hpo_term ( random . choice ( parents ))) return parent_terms","title":"convert_patient_terms_to_parent"},{"location":"api/pheval/prepare/create_noisy_phenopackets/#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.create_random_hpo_terms","text":"Generate a list of random HPO terms. Parameters: Name Type Description Default number_of_scrambled_terms int The count of random HPO terms to be generated. required Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: A list of randomly selected HPO terms. Source code in src/pheval/prepare/create_noisy_phenopackets.py 135 136 137 138 139 140 141 142 143 144 145 146 147 148 def create_random_hpo_terms ( self , number_of_scrambled_terms : int ) -> List [ PhenotypicFeature ]: \"\"\" Generate a list of random HPO terms. Args: number_of_scrambled_terms (int): The count of random HPO terms to be generated. Returns: List[PhenotypicFeature]: A list of randomly selected HPO terms. \"\"\" random_ids = list ( random . sample ( sorted ( self . phenotypic_abnormalities ), number_of_scrambled_terms ) ) return [ self . retrieve_hpo_term ( random_id ) for random_id in random_ids ]","title":"create_random_hpo_terms"},{"location":"api/pheval/prepare/create_noisy_phenopackets/#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.randomise_hpo_terms","text":"Randomise the provided phenotypic features by combining retained, parent-converted, and random HPO terms. Parameters: Name Type Description Default phenotypic_features List [ PhenotypicFeature ] List of phenotypic features to be randomised. required Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: A list of randomised HPO terms. Note This method randomises the provided phenotypic features by incorporating three types of HPO terms: 1. Retained Patient Terms: Non-scrambled (real patient) HPO terms retained based on the scramble factor. 2. Converted to Parent Terms: Subset of HPO terms converted to their respective parent terms. 3. Random HPO Terms: Newly generated random HPO terms based on the scramble factor. The method determines the count of terms for each category and combines them to form a final list of randomised HPO terms to be used in the phenotypic features. Source code in src/pheval/prepare/create_noisy_phenopackets.py 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 def randomise_hpo_terms ( self , phenotypic_features : List [ PhenotypicFeature ], ) -> List [ PhenotypicFeature ]: \"\"\" Randomise the provided phenotypic features by combining retained, parent-converted, and random HPO terms. Args: phenotypic_features (List[PhenotypicFeature]): List of phenotypic features to be randomised. Returns: List[PhenotypicFeature]: A list of randomised HPO terms. Note: This method randomises the provided phenotypic features by incorporating three types of HPO terms: 1. Retained Patient Terms: Non-scrambled (real patient) HPO terms retained based on the scramble factor. 2. Converted to Parent Terms: Subset of HPO terms converted to their respective parent terms. 3. Random HPO Terms: Newly generated random HPO terms based on the scramble factor. The method determines the count of terms for each category and combines them to form a final list of randomised HPO terms to be used in the phenotypic features. \"\"\" number_of_scrambled_terms = self . scramble_factor_proportions ( phenotypic_features ) retained_patient_terms = self . retain_real_patient_terms ( phenotypic_features , number_of_scrambled_terms ) return ( retained_patient_terms + self . convert_patient_terms_to_parent ( phenotypic_features , retained_patient_terms , number_of_scrambled_terms ) + self . create_random_hpo_terms ( number_of_scrambled_terms ) )","title":"randomise_hpo_terms"},{"location":"api/pheval/prepare/create_noisy_phenopackets/#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.retain_real_patient_terms","text":"Return a list of real patient HPO terms, retaining a specific number of non-scrambled terms. Parameters: Name Type Description Default phenotypic_features List [ PhenotypicFeature ] List of phenotypic features. required number_of_scrambled_terms int The count of scrambled HPO terms. required Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: A list of non-scrambled (real patient) HPO terms. Source code in src/pheval/prepare/create_noisy_phenopackets.py 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 @staticmethod def retain_real_patient_terms ( phenotypic_features : List [ PhenotypicFeature ], number_of_scrambled_terms : int , ) -> List [ PhenotypicFeature ]: \"\"\" Return a list of real patient HPO terms, retaining a specific number of non-scrambled terms. Args: phenotypic_features (List[PhenotypicFeature]): List of phenotypic features. number_of_scrambled_terms (int): The count of scrambled HPO terms. Returns: List[PhenotypicFeature]: A list of non-scrambled (real patient) HPO terms. \"\"\" if len ( phenotypic_features ) > 1 : number_of_real_id = len ( phenotypic_features ) - number_of_scrambled_terms else : number_of_real_id = 1 return random . sample ( phenotypic_features , number_of_real_id )","title":"retain_real_patient_terms"},{"location":"api/pheval/prepare/create_noisy_phenopackets/#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.retrieve_hpo_term","text":"Retrieve an HPO term based on the provided HPO ID. Parameters: Name Type Description Default hpo_id str The HPO ID of the term to retrieve. required Returns: Name Type Description PhenotypicFeature PhenotypicFeature The PhenotypicFeature object representing the retrieved HPO term. Source code in src/pheval/prepare/create_noisy_phenopackets.py 59 60 61 62 63 64 65 66 67 68 69 70 71 def retrieve_hpo_term ( self , hpo_id : str ) -> PhenotypicFeature : \"\"\" Retrieve an HPO term based on the provided HPO ID. Args: hpo_id (str): The HPO ID of the term to retrieve. Returns: PhenotypicFeature: The PhenotypicFeature object representing the retrieved HPO term. \"\"\" rels = self . hpo_ontology . entity_alias_map ( hpo_id ) hpo_term = \"\" . join ( rels [( list ( rels . keys ())[ 0 ])]) return PhenotypicFeature ( type = OntologyClass ( id = hpo_id , label = hpo_term ))","title":"retrieve_hpo_term"},{"location":"api/pheval/prepare/create_noisy_phenopackets/#src.pheval.prepare.create_noisy_phenopackets.HpoRandomiser.scramble_factor_proportions","text":"Calculate the proportion of scrambled HPO terms based on the scramble factor. Parameters: Name Type Description Default phenotypic_features list [ PhenotypicFeature ] List of phenotypic features. required Returns: Name Type Description int int The calculated number of phenotypic features to be scrambled. Source code in src/pheval/prepare/create_noisy_phenopackets.py 44 45 46 47 48 49 50 51 52 53 54 55 56 57 def scramble_factor_proportions ( self , phenotypic_features : list [ PhenotypicFeature ]) -> int : \"\"\" Calculate the proportion of scrambled HPO terms based on the scramble factor. Args: phenotypic_features (list[PhenotypicFeature]): List of phenotypic features. Returns: int: The calculated number of phenotypic features to be scrambled. \"\"\" if len ( phenotypic_features ) == 1 : return 1 else : return int ( round ( len ( phenotypic_features ) * self . scramble_factor , 0 ))","title":"scramble_factor_proportions"},{"location":"api/pheval/prepare/create_noisy_phenopackets/#src.pheval.prepare.create_noisy_phenopackets.add_noise_to_phenotypic_profile","text":"Randomise the phenotypic profile of a Phenopacket or Family. Parameters: Name Type Description Default hpo_randomiser HpoRandomiser An instance of HpoRandomiser used for randomisation. required phenopacket Union [ Phenopacket , Family ] The Phenopacket or Family to be randomised. required Returns: Type Description Union [ Phenopacket , Family ] Union[Phenopacket, Family]: The randomised Phenopacket or Family. Source code in src/pheval/prepare/create_noisy_phenopackets.py 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 def add_noise_to_phenotypic_profile ( hpo_randomiser : HpoRandomiser , phenopacket : Union [ Phenopacket , Family ], ) -> Union [ Phenopacket , Family ]: \"\"\" Randomise the phenotypic profile of a Phenopacket or Family. Args: hpo_randomiser (HpoRandomiser): An instance of HpoRandomiser used for randomisation. phenopacket (Union[Phenopacket, Family]): The Phenopacket or Family to be randomised. Returns: Union[Phenopacket, Family]: The randomised Phenopacket or Family. \"\"\" phenotypic_features = PhenopacketUtil ( phenopacket ) . observed_phenotypic_features () random_phenotypes = hpo_randomiser . randomise_hpo_terms ( phenotypic_features ) randomised_phenopacket = PhenopacketRebuilder ( phenopacket ) . add_randomised_hpo ( random_phenotypes ) return randomised_phenopacket","title":"add_noise_to_phenotypic_profile"},{"location":"api/pheval/prepare/create_noisy_phenopackets/#src.pheval.prepare.create_noisy_phenopackets.create_scrambled_phenopacket","text":"Create a scrambled version of a Phenopacket. Parameters: Name Type Description Default output_dir Path The directory to store the output scrambled Phenopacket. required phenopacket_path Path The path to the original Phenopacket file. required scramble_factor float A factor determining the level of scrambling for phenotypic features. required Source code in src/pheval/prepare/create_noisy_phenopackets.py 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 def create_scrambled_phenopacket ( output_dir : Path , phenopacket_path : Path , scramble_factor : float ) -> None : \"\"\" Create a scrambled version of a Phenopacket. Args: output_dir (Path): The directory to store the output scrambled Phenopacket. phenopacket_path (Path): The path to the original Phenopacket file. scramble_factor (float): A factor determining the level of scrambling for phenotypic features. \"\"\" ontology = load_ontology () hpo_randomiser = HpoRandomiser ( ontology , scramble_factor ) phenopacket = phenopacket_reader ( phenopacket_path ) created_noisy_phenopacket = add_noise_to_phenotypic_profile ( hpo_randomiser , phenopacket , ) write_phenopacket ( created_noisy_phenopacket , output_dir . joinpath ( phenopacket_path . name ), )","title":"create_scrambled_phenopacket"},{"location":"api/pheval/prepare/create_noisy_phenopackets/#src.pheval.prepare.create_noisy_phenopackets.create_scrambled_phenopackets","text":"Create scrambled versions of Phenopackets within a directory. Parameters: Name Type Description Default output_dir Path The directory to store the output scrambled Phenopackets. required phenopacket_dir Path The directory containing the original Phenopacket files. required scramble_factor float A factor determining the level of scrambling for phenotypic features. required Source code in src/pheval/prepare/create_noisy_phenopackets.py 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 def create_scrambled_phenopackets ( output_dir : Path , phenopacket_dir : Path , scramble_factor : float ) -> None : \"\"\" Create scrambled versions of Phenopackets within a directory. Args: output_dir (Path): The directory to store the output scrambled Phenopackets. phenopacket_dir (Path): The directory containing the original Phenopacket files. scramble_factor (float): A factor determining the level of scrambling for phenotypic features. \"\"\" ontology = load_ontology () hpo_randomiser = HpoRandomiser ( ontology , scramble_factor ) phenopacket_files = files_with_suffix ( phenopacket_dir , \".json\" ) for phenopacket_path in phenopacket_files : phenopacket = phenopacket_reader ( phenopacket_path ) created_noisy_phenopacket = add_noise_to_phenotypic_profile ( hpo_randomiser , phenopacket ) write_phenopacket ( created_noisy_phenopacket , output_dir . joinpath ( phenopacket_path . name , ), )","title":"create_scrambled_phenopackets"},{"location":"api/pheval/prepare/create_noisy_phenopackets/#src.pheval.prepare.create_noisy_phenopackets.load_ontology","text":"Load the Human Phenotype Ontology (HPO). Returns: Name Type Description ProntoImplementation An instance of ProntoImplementation containing the loaded HPO. Source code in src/pheval/prepare/create_noisy_phenopackets.py 18 19 20 21 22 23 24 25 26 def load_ontology (): \"\"\" Load the Human Phenotype Ontology (HPO). Returns: ProntoImplementation: An instance of ProntoImplementation containing the loaded HPO. \"\"\" resource = OntologyResource ( slug = \"hp.obo\" , local = False ) return ProntoImplementation ( resource )","title":"load_ontology"},{"location":"api/pheval/prepare/create_noisy_phenopackets/#src.pheval.prepare.create_noisy_phenopackets.scramble_phenopackets","text":"Create scrambled phenopackets from either a single phenopacket or a directory of phenopackets. Parameters: Name Type Description Default output_dir Path The directory to store the output scrambled Phenopackets. required phenopacket_path Path The path to a single Phenopacket file (if applicable). required phenopacket_dir Path The directory containing multiple Phenopacket files (if applicable). required scramble_factor float A factor determining the level of scrambling for phenotypic features. required Source code in src/pheval/prepare/create_noisy_phenopackets.py 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 def scramble_phenopackets ( output_dir : Path , phenopacket_path : Path , phenopacket_dir : Path , scramble_factor : float ) -> None : \"\"\" Create scrambled phenopackets from either a single phenopacket or a directory of phenopackets. Args: output_dir (Path): The directory to store the output scrambled Phenopackets. phenopacket_path (Path): The path to a single Phenopacket file (if applicable). phenopacket_dir (Path): The directory containing multiple Phenopacket files (if applicable). scramble_factor (float): A factor determining the level of scrambling for phenotypic features. \"\"\" output_dir . mkdir ( exist_ok = True ) if phenopacket_path is not None : create_scrambled_phenopacket ( output_dir , phenopacket_path , scramble_factor ) elif phenopacket_dir is not None : create_scrambled_phenopackets ( output_dir , phenopacket_dir , scramble_factor )","title":"scramble_phenopackets"},{"location":"api/pheval/prepare/create_spiked_vcf/","text":"VcfFile dataclass Represents a VCF file with its name, contents, and header information. Attributes: Name Type Description vcf_file_name str The name of the VCF file. vcf_contents List [ str ] The contents of the VCF file. vcf_header VcfHeader The parsed header information of the VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 @dataclass class VcfFile : \"\"\" Represents a VCF file with its name, contents, and header information. Attributes: vcf_file_name (str): The name of the VCF file. vcf_contents (List[str]): The contents of the VCF file. vcf_header (VcfHeader): The parsed header information of the VCF file. \"\"\" vcf_file_name : str = None vcf_contents : List [ str ] = None vcf_header : VcfHeader = None @staticmethod def populate_fields ( template_vcf : Path ): \"\"\" Populate the fields of the VcfFile instance using the contents of a template VCF file. Args: template_vcf (Path): The path to the template VCF file. Returns: VcfFile: An instance of VcfFile with populated fields. \"\"\" contents = read_vcf ( template_vcf ) return VcfFile ( template_vcf . name , contents , VcfHeaderParser ( contents ) . parse_vcf_header ()) populate_fields ( template_vcf ) staticmethod Populate the fields of the VcfFile instance using the contents of a template VCF file. Parameters: Name Type Description Default template_vcf Path The path to the template VCF file. required Returns: Name Type Description VcfFile An instance of VcfFile with populated fields. Source code in src/pheval/prepare/create_spiked_vcf.py 190 191 192 193 194 195 196 197 198 199 200 201 202 203 @staticmethod def populate_fields ( template_vcf : Path ): \"\"\" Populate the fields of the VcfFile instance using the contents of a template VCF file. Args: template_vcf (Path): The path to the template VCF file. Returns: VcfFile: An instance of VcfFile with populated fields. \"\"\" contents = read_vcf ( template_vcf ) return VcfFile ( template_vcf . name , contents , VcfHeaderParser ( contents ) . parse_vcf_header ()) VcfHeader dataclass Data obtained from VCF header. Parameters: Name Type Description Default sample_id str The sample identifier from the VCF header. required assembly str The assembly information obtained from the VCF header. required chr_status bool A boolean indicating whether the VCF denotes chromosomes as chr or not. required Source code in src/pheval/prepare/create_spiked_vcf.py 78 79 80 81 82 83 84 85 86 87 88 89 90 @dataclass class VcfHeader : \"\"\"Data obtained from VCF header. Args: sample_id (str): The sample identifier from the VCF header. assembly (str): The assembly information obtained from the VCF header. chr_status (bool): A boolean indicating whether the VCF denotes chromosomes as chr or not. \"\"\" sample_id : str assembly : str chr_status : bool VcfHeaderParser Class for parsing the header of a VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 class VcfHeaderParser : \"\"\"Class for parsing the header of a VCF file.\"\"\" def __init__ ( self , vcf_contents : list [ str ]): \"\"\" Initialise the VcfHeaderParser. Args: vcf_contents (list[str]): The contents of the VCF file as a list of strings. \"\"\" self . vcf_contents = vcf_contents def parse_assembly ( self ) -> tuple [ str , bool ]: \"\"\" Parse the genome assembly and format of vcf_records. Returns: Tuple[str, bool]: A tuple containing the assembly and chromosome status (True/False). \"\"\" vcf_assembly = {} chr_status = False for line in self . vcf_contents : if line . startswith ( \"##contig=<ID\" ): tokens = line . split ( \",\" ) chromosome = re . sub ( r \"^.*?ID=\" , \"\" , [ token for token in tokens if \"ID=\" in token ][ 0 ] ) if \"chr\" in chromosome : chr_status = True chromosome = chromosome . replace ( \"chr\" , \"\" ) contig_length = re . sub ( \"[^0-9]+\" , \"\" , [ token for token in tokens if \"length=\" in token ][ 0 ], ) vcf_assembly [ chromosome ] = int ( contig_length ) vcf_assembly = { i : vcf_assembly [ i ] for i in vcf_assembly if i . isdigit ()} assembly = [ k for k , v in genome_assemblies . items () if v == vcf_assembly ][ 0 ] return assembly , chr_status def parse_sample_id ( self ) -> str : \"\"\" Parse the sample ID of the VCF. Returns: str: The sample ID extracted from the VCF header. \"\"\" for line in self . vcf_contents : if line . startswith ( \"#CHROM\" ): return line . split ( \" \\t \" )[ 9 ] . rstrip () def parse_vcf_header ( self ) -> VcfHeader : \"\"\" Parse the header of the VCF. Returns: VcfHeader: An instance of VcfHeader containing sample ID, assembly, and chromosome status. \"\"\" assembly , chr_status = self . parse_assembly () sample_id = self . parse_sample_id () return VcfHeader ( sample_id , assembly , chr_status ) __init__ ( vcf_contents ) Initialise the VcfHeaderParser. Parameters: Name Type Description Default vcf_contents list [ str ] The contents of the VCF file as a list of strings. required Source code in src/pheval/prepare/create_spiked_vcf.py 115 116 117 118 119 120 121 122 def __init__ ( self , vcf_contents : list [ str ]): \"\"\" Initialise the VcfHeaderParser. Args: vcf_contents (list[str]): The contents of the VCF file as a list of strings. \"\"\" self . vcf_contents = vcf_contents parse_assembly () Parse the genome assembly and format of vcf_records. Returns: Type Description tuple [ str , bool ] Tuple[str, bool]: A tuple containing the assembly and chromosome status (True/False). Source code in src/pheval/prepare/create_spiked_vcf.py 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 def parse_assembly ( self ) -> tuple [ str , bool ]: \"\"\" Parse the genome assembly and format of vcf_records. Returns: Tuple[str, bool]: A tuple containing the assembly and chromosome status (True/False). \"\"\" vcf_assembly = {} chr_status = False for line in self . vcf_contents : if line . startswith ( \"##contig=<ID\" ): tokens = line . split ( \",\" ) chromosome = re . sub ( r \"^.*?ID=\" , \"\" , [ token for token in tokens if \"ID=\" in token ][ 0 ] ) if \"chr\" in chromosome : chr_status = True chromosome = chromosome . replace ( \"chr\" , \"\" ) contig_length = re . sub ( \"[^0-9]+\" , \"\" , [ token for token in tokens if \"length=\" in token ][ 0 ], ) vcf_assembly [ chromosome ] = int ( contig_length ) vcf_assembly = { i : vcf_assembly [ i ] for i in vcf_assembly if i . isdigit ()} assembly = [ k for k , v in genome_assemblies . items () if v == vcf_assembly ][ 0 ] return assembly , chr_status parse_sample_id () Parse the sample ID of the VCF. Returns: Name Type Description str str The sample ID extracted from the VCF header. Source code in src/pheval/prepare/create_spiked_vcf.py 152 153 154 155 156 157 158 159 160 161 def parse_sample_id ( self ) -> str : \"\"\" Parse the sample ID of the VCF. Returns: str: The sample ID extracted from the VCF header. \"\"\" for line in self . vcf_contents : if line . startswith ( \"#CHROM\" ): return line . split ( \" \\t \" )[ 9 ] . rstrip () parse_vcf_header () Parse the header of the VCF. Returns: Name Type Description VcfHeader VcfHeader An instance of VcfHeader containing sample ID, assembly, and chromosome status. Source code in src/pheval/prepare/create_spiked_vcf.py 163 164 165 166 167 168 169 170 171 172 def parse_vcf_header ( self ) -> VcfHeader : \"\"\" Parse the header of the VCF. Returns: VcfHeader: An instance of VcfHeader containing sample ID, assembly, and chromosome status. \"\"\" assembly , chr_status = self . parse_assembly () sample_id = self . parse_sample_id () return VcfHeader ( sample_id , assembly , chr_status ) VcfSpiker Class for spiking proband variants into template VCF file contents. Source code in src/pheval/prepare/create_spiked_vcf.py 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 class VcfSpiker : \"\"\"Class for spiking proband variants into template VCF file contents.\"\"\" def __init__ ( self , vcf_contents : list [ str ], proband_causative_variants : list [ ProbandCausativeVariant ], vcf_header : VcfHeader , ): \"\"\" Initialise the VcfSpiker. Args: vcf_contents (List[str]): Contents of the template VCF file. proband_causative_variants (List[ProbandCausativeVariant]): List of proband causative variants. vcf_header (VcfHeader): The VCF header information. \"\"\" self . vcf_contents = vcf_contents self . proband_causative_variants = proband_causative_variants self . vcf_header = vcf_header def construct_variant_entry ( self , proband_variant_data : ProbandCausativeVariant ) -> List [ str ]: \"\"\" Construct variant entries. Args: proband_variant_data (ProbandCausativeVariant): Data for the proband variant. Returns: List[str]: Constructed variant entry as a list of strings. \"\"\" genotype_codes = { \"hemizygous\" : \"0/1\" , \"homozygous\" : \"1/1\" , \"heterozygous\" : \"0/1\" , \"compound heterozygous\" : \"0/1\" , } if self . vcf_header . chr_status is True and \"chr\" not in proband_variant_data . variant . chrom : proband_variant_data . variant . chrom = \"chr\" + proband_variant_data . variant . chrom return [ proband_variant_data . variant . chrom , str ( proband_variant_data . variant . pos ), \".\" , proband_variant_data . variant . ref , ( f \"< { proband_variant_data . variant . alt } >\" if proband_variant_data . variant . ref == \"N\" else proband_variant_data . variant . alt ), \"100\" , \"PASS\" , proband_variant_data . info if proband_variant_data . info else \".\" , \"GT\" , genotype_codes [ proband_variant_data . genotype . lower ()] + \" \\n \" , ] def construct_vcf_records ( self , template_vcf_name : str ) -> List [ str ]: \"\"\" Construct updated VCF records by inserting spiked variants into the correct positions within the VCF. Args: template_vcf_name (str): Name of the template VCF file. Returns: List[str]: Updated VCF records containing the spiked variants. \"\"\" updated_vcf_records = copy ( self . vcf_contents ) for variant in self . proband_causative_variants : variant_entry = self . construct_variant_entry ( variant ) matching_indices = [ i for i , val in enumerate ( updated_vcf_records ) if val . split ( \" \\t \" )[ 0 ] == variant_entry [ 0 ] and int ( val . split ( \" \\t \" )[ 1 ]) < int ( variant_entry [ 1 ]) ] if matching_indices : variant_entry_position = matching_indices [ - 1 ] + 1 else : info_log . warning ( f \"Could not find entry position for { variant . variant . chrom } - { variant . variant . pos } -\" f \" { variant . variant . ref } - { variant . variant . alt } in { template_vcf_name } , \" \"inserting at end of VCF contents.\" ) variant_entry_position = len ( updated_vcf_records ) updated_vcf_records . insert ( variant_entry_position , \" \\t \" . join ( variant_entry )) return updated_vcf_records def construct_header ( self , updated_vcf_records : List [ str ]) -> List [ str ]: \"\"\" Construct the header of the VCF. Args: updated_vcf_records (List[str]): Updated VCF records. Returns: List[str]: Constructed header as a list of strings. \"\"\" updated_vcf_file = [] for line in updated_vcf_records : if line . startswith ( \"#\" ): text = line . replace ( self . vcf_header . sample_id , self . proband_causative_variants [ 0 ] . proband_id , ) else : text = line updated_vcf_file . append ( text ) return updated_vcf_file def construct_vcf ( self , template_vcf_name : str ) -> List [ str ]: \"\"\" Construct the entire spiked VCF file by incorporating the spiked variants into the VCF. Args: template_vcf_name (str): Name of the template VCF file. Returns: List[str]: The complete spiked VCF file content as a list of strings. \"\"\" return self . construct_header ( self . construct_vcf_records ( template_vcf_name )) __init__ ( vcf_contents , proband_causative_variants , vcf_header ) Initialise the VcfSpiker. Parameters: Name Type Description Default vcf_contents List [ str ] Contents of the template VCF file. required proband_causative_variants List [ ProbandCausativeVariant ] List of proband causative variants. required vcf_header VcfHeader The VCF header information. required Source code in src/pheval/prepare/create_spiked_vcf.py 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 def __init__ ( self , vcf_contents : list [ str ], proband_causative_variants : list [ ProbandCausativeVariant ], vcf_header : VcfHeader , ): \"\"\" Initialise the VcfSpiker. Args: vcf_contents (List[str]): Contents of the template VCF file. proband_causative_variants (List[ProbandCausativeVariant]): List of proband causative variants. vcf_header (VcfHeader): The VCF header information. \"\"\" self . vcf_contents = vcf_contents self . proband_causative_variants = proband_causative_variants self . vcf_header = vcf_header construct_header ( updated_vcf_records ) Construct the header of the VCF. Parameters: Name Type Description Default updated_vcf_records List [ str ] Updated VCF records. required Returns: Type Description List [ str ] List[str]: Constructed header as a list of strings. Source code in src/pheval/prepare/create_spiked_vcf.py 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 def construct_header ( self , updated_vcf_records : List [ str ]) -> List [ str ]: \"\"\" Construct the header of the VCF. Args: updated_vcf_records (List[str]): Updated VCF records. Returns: List[str]: Constructed header as a list of strings. \"\"\" updated_vcf_file = [] for line in updated_vcf_records : if line . startswith ( \"#\" ): text = line . replace ( self . vcf_header . sample_id , self . proband_causative_variants [ 0 ] . proband_id , ) else : text = line updated_vcf_file . append ( text ) return updated_vcf_file construct_variant_entry ( proband_variant_data ) Construct variant entries. Parameters: Name Type Description Default proband_variant_data ProbandCausativeVariant Data for the proband variant. required Returns: Type Description List [ str ] List[str]: Constructed variant entry as a list of strings. Source code in src/pheval/prepare/create_spiked_vcf.py 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 def construct_variant_entry ( self , proband_variant_data : ProbandCausativeVariant ) -> List [ str ]: \"\"\" Construct variant entries. Args: proband_variant_data (ProbandCausativeVariant): Data for the proband variant. Returns: List[str]: Constructed variant entry as a list of strings. \"\"\" genotype_codes = { \"hemizygous\" : \"0/1\" , \"homozygous\" : \"1/1\" , \"heterozygous\" : \"0/1\" , \"compound heterozygous\" : \"0/1\" , } if self . vcf_header . chr_status is True and \"chr\" not in proband_variant_data . variant . chrom : proband_variant_data . variant . chrom = \"chr\" + proband_variant_data . variant . chrom return [ proband_variant_data . variant . chrom , str ( proband_variant_data . variant . pos ), \".\" , proband_variant_data . variant . ref , ( f \"< { proband_variant_data . variant . alt } >\" if proband_variant_data . variant . ref == \"N\" else proband_variant_data . variant . alt ), \"100\" , \"PASS\" , proband_variant_data . info if proband_variant_data . info else \".\" , \"GT\" , genotype_codes [ proband_variant_data . genotype . lower ()] + \" \\n \" , ] construct_vcf ( template_vcf_name ) Construct the entire spiked VCF file by incorporating the spiked variants into the VCF. Parameters: Name Type Description Default template_vcf_name str Name of the template VCF file. required Returns: Type Description List [ str ] List[str]: The complete spiked VCF file content as a list of strings. Source code in src/pheval/prepare/create_spiked_vcf.py 393 394 395 396 397 398 399 400 401 402 403 def construct_vcf ( self , template_vcf_name : str ) -> List [ str ]: \"\"\" Construct the entire spiked VCF file by incorporating the spiked variants into the VCF. Args: template_vcf_name (str): Name of the template VCF file. Returns: List[str]: The complete spiked VCF file content as a list of strings. \"\"\" return self . construct_header ( self . construct_vcf_records ( template_vcf_name )) construct_vcf_records ( template_vcf_name ) Construct updated VCF records by inserting spiked variants into the correct positions within the VCF. Parameters: Name Type Description Default template_vcf_name str Name of the template VCF file. required Returns: Type Description List [ str ] List[str]: Updated VCF records containing the spiked variants. Source code in src/pheval/prepare/create_spiked_vcf.py 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 def construct_vcf_records ( self , template_vcf_name : str ) -> List [ str ]: \"\"\" Construct updated VCF records by inserting spiked variants into the correct positions within the VCF. Args: template_vcf_name (str): Name of the template VCF file. Returns: List[str]: Updated VCF records containing the spiked variants. \"\"\" updated_vcf_records = copy ( self . vcf_contents ) for variant in self . proband_causative_variants : variant_entry = self . construct_variant_entry ( variant ) matching_indices = [ i for i , val in enumerate ( updated_vcf_records ) if val . split ( \" \\t \" )[ 0 ] == variant_entry [ 0 ] and int ( val . split ( \" \\t \" )[ 1 ]) < int ( variant_entry [ 1 ]) ] if matching_indices : variant_entry_position = matching_indices [ - 1 ] + 1 else : info_log . warning ( f \"Could not find entry position for { variant . variant . chrom } - { variant . variant . pos } -\" f \" { variant . variant . ref } - { variant . variant . alt } in { template_vcf_name } , \" \"inserting at end of VCF contents.\" ) variant_entry_position = len ( updated_vcf_records ) updated_vcf_records . insert ( variant_entry_position , \" \\t \" . join ( variant_entry )) return updated_vcf_records VcfWriter Class for writing VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 class VcfWriter : \"\"\"Class for writing VCF file.\"\"\" def __init__ ( self , vcf_contents : List [ str ], spiked_vcf_file_path : Path , ): \"\"\" Initialise the VcfWriter class. Args: vcf_contents (List[str]): Contents of the VCF file to be written. spiked_vcf_file_path (Path): Path to the spiked VCF file to be created. \"\"\" self . vcf_contents = vcf_contents self . spiked_vcf_file_path = spiked_vcf_file_path def write_gzip ( self ) -> None : \"\"\" Write the VCF contents to a gzipped VCF file. \"\"\" encoded_contents = [ line . encode () for line in self . vcf_contents ] with gzip . open ( self . spiked_vcf_file_path , \"wb\" ) as f : for line in encoded_contents : f . write ( line ) f . close () def write_uncompressed ( self ) -> None : \"\"\" Write the VCF contents to an uncompressed VCF file. \"\"\" with open ( self . spiked_vcf_file_path , \"w\" ) as file : file . writelines ( self . vcf_contents ) file . close () def write_vcf_file ( self ) -> None : \"\"\" Write the VCF file based on compression type. Determines the file writing method based on the compression type of the spiked VCF file path. Writes the VCF contents to the corresponding file format (gzip or uncompressed). \"\"\" self . write_gzip () if is_gzipped ( self . spiked_vcf_file_path ) else self . write_uncompressed () __init__ ( vcf_contents , spiked_vcf_file_path ) Initialise the VcfWriter class. Parameters: Name Type Description Default vcf_contents List [ str ] Contents of the VCF file to be written. required spiked_vcf_file_path Path Path to the spiked VCF file to be created. required Source code in src/pheval/prepare/create_spiked_vcf.py 409 410 411 412 413 414 415 416 417 418 419 420 421 422 def __init__ ( self , vcf_contents : List [ str ], spiked_vcf_file_path : Path , ): \"\"\" Initialise the VcfWriter class. Args: vcf_contents (List[str]): Contents of the VCF file to be written. spiked_vcf_file_path (Path): Path to the spiked VCF file to be created. \"\"\" self . vcf_contents = vcf_contents self . spiked_vcf_file_path = spiked_vcf_file_path write_gzip () Write the VCF contents to a gzipped VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 424 425 426 427 428 429 430 431 432 def write_gzip ( self ) -> None : \"\"\" Write the VCF contents to a gzipped VCF file. \"\"\" encoded_contents = [ line . encode () for line in self . vcf_contents ] with gzip . open ( self . spiked_vcf_file_path , \"wb\" ) as f : for line in encoded_contents : f . write ( line ) f . close () write_uncompressed () Write the VCF contents to an uncompressed VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 434 435 436 437 438 439 440 def write_uncompressed ( self ) -> None : \"\"\" Write the VCF contents to an uncompressed VCF file. \"\"\" with open ( self . spiked_vcf_file_path , \"w\" ) as file : file . writelines ( self . vcf_contents ) file . close () write_vcf_file () Write the VCF file based on compression type. Determines the file writing method based on the compression type of the spiked VCF file path. Writes the VCF contents to the corresponding file format (gzip or uncompressed). Source code in src/pheval/prepare/create_spiked_vcf.py 442 443 444 445 446 447 448 449 def write_vcf_file ( self ) -> None : \"\"\" Write the VCF file based on compression type. Determines the file writing method based on the compression type of the spiked VCF file path. Writes the VCF contents to the corresponding file format (gzip or uncompressed). \"\"\" self . write_gzip () if is_gzipped ( self . spiked_vcf_file_path ) else self . write_uncompressed () check_variant_assembly ( proband_causative_variants , vcf_header , phenopacket_path ) Check the assembly of the variant assembly against the VCF. Parameters: Name Type Description Default proband_causative_variants List [ ProbandCausativeVariant ] A list of causative variants from the proband. required vcf_header VcfHeader An instance of VcfHeader representing the VCF file's header. required phenopacket_path Path The path to the Phenopacket file. required Raises: Type Description ValueError If there are too many or incompatible genome assemblies found. IncompatibleGenomeAssemblyError If the assembly in the Phenopacket does not match the VCF assembly. Source code in src/pheval/prepare/create_spiked_vcf.py 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 def check_variant_assembly ( proband_causative_variants : list [ ProbandCausativeVariant ], vcf_header : VcfHeader , phenopacket_path : Path , ) -> None : \"\"\" Check the assembly of the variant assembly against the VCF. Args: proband_causative_variants (List[ProbandCausativeVariant]): A list of causative variants from the proband. vcf_header (VcfHeader): An instance of VcfHeader representing the VCF file's header. phenopacket_path (Path): The path to the Phenopacket file. Raises: ValueError: If there are too many or incompatible genome assemblies found. IncompatibleGenomeAssemblyError: If the assembly in the Phenopacket does not match the VCF assembly. \"\"\" compatible_genome_assembly = { \"GRCh37\" , \"hg19\" , \"GRCh38\" , \"hg38\" } phenopacket_assembly = list ({ variant . assembly for variant in proband_causative_variants }) if len ( phenopacket_assembly ) > 1 : raise ValueError ( \"Too many genome assemblies!\" ) if phenopacket_assembly [ 0 ] not in compatible_genome_assembly : raise IncompatibleGenomeAssemblyError ( phenopacket_assembly , phenopacket_path ) if ( phenopacket_assembly [ 0 ] in { \"hg19\" , \"GRCh37\" } and vcf_header . assembly not in { \"hg19\" , \"GRCh37\" } ) or ( phenopacket_assembly [ 0 ] in { \"hg38\" , \"GRCh38\" } and vcf_header . assembly not in { \"hg38\" , \"GRCh38\" } ): raise IncompatibleGenomeAssemblyError ( assembly = phenopacket_assembly , phenopacket = phenopacket_path ) create_spiked_vcf ( output_dir , phenopacket_path , hg19_template_vcf , hg38_template_vcf , hg19_vcf_dir , hg38_vcf_dir ) Create a spiked VCF for a Phenopacket. Parameters: Name Type Description Default output_dir Path The directory to store the generated spiked VCF file. required phenopacket_path Path Path to the Phenopacket file. required hg19_template_vcf Path Path to the hg19 template VCF file (optional). required hg38_template_vcf Path Path to the hg38 template VCF file (optional). required hg19_vcf_dir Path The directory containing the hg19 VCF files (optional). required hg38_vcf_dir Path The directory containing the hg38 VCF files (optional). required Raises: Type Description InputError If both hg19_template_vcf and hg38_template_vcf are None. Source code in src/pheval/prepare/create_spiked_vcf.py 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 def create_spiked_vcf ( output_dir : Path , phenopacket_path : Path , hg19_template_vcf : Path , hg38_template_vcf : Path , hg19_vcf_dir : Path , hg38_vcf_dir : Path , ) -> None : \"\"\" Create a spiked VCF for a Phenopacket. Args: output_dir (Path): The directory to store the generated spiked VCF file. phenopacket_path (Path): Path to the Phenopacket file. hg19_template_vcf (Path): Path to the hg19 template VCF file (optional). hg38_template_vcf (Path): Path to the hg38 template VCF file (optional). hg19_vcf_dir (Path): The directory containing the hg19 VCF files (optional). hg38_vcf_dir (Path): The directory containing the hg38 VCF files (optional). Raises: InputError: If both hg19_template_vcf and hg38_template_vcf are None. \"\"\" if hg19_template_vcf is None and hg38_template_vcf is None : raise InputError ( \"Either a hg19 template vcf or hg38 template vcf must be specified\" ) hg19_vcf_info = VcfFile . populate_fields ( hg19_template_vcf ) if hg19_template_vcf else None hg38_vcf_info = VcfFile . populate_fields ( hg38_template_vcf ) if hg38_template_vcf else None spike_and_update_phenopacket ( hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir , output_dir , phenopacket_path ) create_spiked_vcfs ( output_dir , phenopacket_dir , hg19_template_vcf , hg38_template_vcf , hg19_vcf_dir , hg38_vcf_dir ) Create a spiked VCF for a directory of Phenopackets. Parameters: Name Type Description Default output_dir Path The directory to store the generated spiked VCF file. required phenopacket_dir Path Path to the Phenopacket directory. required hg19_template_vcf Path Path to the template hg19 VCF file (optional). required hg38_template_vcf Path Path to the template hg19 VCF file (optional). required hg19_vcf_dir Path The directory containing the hg19 VCF files (optional). required hg38_vcf_dir Path The directory containing the hg38 VCF files (optional). required Raises: Type Description InputError If both hg19_template_vcf and hg38_template_vcf are None. Source code in src/pheval/prepare/create_spiked_vcf.py 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 def create_spiked_vcfs ( output_dir : Path , phenopacket_dir : Path , hg19_template_vcf : Path , hg38_template_vcf : Path , hg19_vcf_dir : Path , hg38_vcf_dir : Path , ) -> None : \"\"\" Create a spiked VCF for a directory of Phenopackets. Args: output_dir (Path): The directory to store the generated spiked VCF file. phenopacket_dir (Path): Path to the Phenopacket directory. hg19_template_vcf (Path): Path to the template hg19 VCF file (optional). hg38_template_vcf (Path): Path to the template hg19 VCF file (optional). hg19_vcf_dir (Path): The directory containing the hg19 VCF files (optional). hg38_vcf_dir (Path): The directory containing the hg38 VCF files (optional). Raises: InputError: If both hg19_template_vcf and hg38_template_vcf are None. \"\"\" if ( hg19_template_vcf is None and hg38_template_vcf is None and hg19_vcf_dir is None and hg38_vcf_dir is None ): raise InputError ( \"Need to specify a VCF!\" ) hg19_vcf_info = VcfFile . populate_fields ( hg19_template_vcf ) if hg19_template_vcf else None hg38_vcf_info = VcfFile . populate_fields ( hg38_template_vcf ) if hg38_template_vcf else None for phenopacket_path in files_with_suffix ( phenopacket_dir , \".json\" ): spike_and_update_phenopacket ( hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir , output_dir , phenopacket_path ) generate_spiked_vcf_file ( output_dir , phenopacket , phenopacket_path , hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir ) Write spiked VCF contents to a new file. Parameters: Name Type Description Default output_dir Path Path to the directory to store the generated file. required phenopacket Union [ Phenopacket , Family ] Phenopacket or Family containing causative variants. required phenopacket_path Path Path to the Phenopacket file. required hg19_vcf_info VcfFile VCF file info for hg19 template vcf. required hg38_vcf_info VcfFile VCF file info for hg38 template vcf. required hg19_vcf_dir Path The directory containing the hg19 VCF files. required hg38_vcf_dir Path The directory containing the hg38 VCF files. required Returns: File: The generated File object representing the newly created spiked VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 def generate_spiked_vcf_file ( output_dir : Path , phenopacket : Union [ Phenopacket , Family ], phenopacket_path : Path , hg19_vcf_info : VcfFile , hg38_vcf_info : VcfFile , hg19_vcf_dir : Path , hg38_vcf_dir : Path , ) -> File : \"\"\" Write spiked VCF contents to a new file. Args: output_dir (Path): Path to the directory to store the generated file. phenopacket (Union[Phenopacket, Family]): Phenopacket or Family containing causative variants. phenopacket_path (Path): Path to the Phenopacket file. hg19_vcf_info (VcfFile): VCF file info for hg19 template vcf. hg38_vcf_info (VcfFile): VCF file info for hg38 template vcf. hg19_vcf_dir (Path): The directory containing the hg19 VCF files. hg38_vcf_dir (Path): The directory containing the hg38 VCF files. Returns: File: The generated File object representing the newly created spiked VCF file. \"\"\" output_dir . mkdir ( exist_ok = True ) info_log . info ( f \" Created a directory { output_dir } \" ) vcf_assembly , spiked_vcf = spike_vcf_contents ( phenopacket , phenopacket_path , hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir ) spiked_vcf_path = output_dir . joinpath ( phenopacket_path . name . replace ( \".json\" , \".vcf.gz\" )) VcfWriter ( spiked_vcf , spiked_vcf_path ) . write_vcf_file () return File ( uri = urllib . parse . unquote ( spiked_vcf_path . as_uri ()), file_attributes = { \"fileFormat\" : \"vcf\" , \"genomeAssembly\" : vcf_assembly }, ) read_vcf ( vcf_file ) Read the contents of a VCF file into memory, handling both uncompressed and gzipped files. Parameters: Name Type Description Default vcf_file Path The path to the VCF file to be read. required Returns: Type Description List [ str ] List[str]: A list containing the lines of the VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 def read_vcf ( vcf_file : Path ) -> List [ str ]: \"\"\" Read the contents of a VCF file into memory, handling both uncompressed and gzipped files. Args: vcf_file (Path): The path to the VCF file to be read. Returns: List[str]: A list containing the lines of the VCF file. \"\"\" open_fn = gzip . open if is_gzipped ( vcf_file ) else open vcf = open_fn ( vcf_file ) vcf_contents = ( [ line . decode () for line in vcf . readlines ()] if is_gzipped ( vcf_file ) else vcf . readlines () ) vcf . close () return vcf_contents select_vcf_template ( phenopacket_path , proband_causative_variants , hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir ) Select the appropriate VCF template based on the assembly information of the proband causative variants. Parameters: Name Type Description Default phenopacket_path Path The path to the Phenopacket file. required proband_causative_variants List [ ProbandCausativeVariant ] A list of causative variants from the proband. required hg19_vcf_info VcfFile VCF file info for hg19 template vcf. required hg38_vcf_info VcfFile CF file info for hg38 template vcf. required hg19_vcf_dir Path The directory containing the hg19 VCF files. required hg38_vcf_dir Path The directory containing the hg38 VCF files. required Returns: Name Type Description VcfFile VcfFile The selected VCF template file based on the assembly information of the proband causative variants. Source code in src/pheval/prepare/create_spiked_vcf.py 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 def select_vcf_template ( phenopacket_path : Path , proband_causative_variants : List [ ProbandCausativeVariant ], hg19_vcf_info : VcfFile , hg38_vcf_info : VcfFile , hg19_vcf_dir : Path , hg38_vcf_dir : Path , ) -> VcfFile : \"\"\" Select the appropriate VCF template based on the assembly information of the proband causative variants. Args: phenopacket_path (Path): The path to the Phenopacket file. proband_causative_variants (List[ProbandCausativeVariant]): A list of causative variants from the proband. hg19_vcf_info (VcfFile): VCF file info for hg19 template vcf. hg38_vcf_info (VcfFile): CF file info for hg38 template vcf. hg19_vcf_dir (Path): The directory containing the hg19 VCF files. hg38_vcf_dir (Path): The directory containing the hg38 VCF files. Returns: VcfFile: The selected VCF template file based on the assembly information of the proband causative variants. \"\"\" if proband_causative_variants [ 0 ] . assembly in [ \"hg19\" , \"GRCh37\" ]: if hg19_vcf_info : return hg19_vcf_info elif hg19_vcf_dir : return VcfFile . populate_fields ( random . choice ( all_files ( hg19_vcf_dir ))) else : raise InputError ( \"Must specify hg19 template VCF!\" ) elif proband_causative_variants [ 0 ] . assembly in [ \"hg38\" , \"GRCh38\" ]: if hg38_vcf_info : return hg38_vcf_info elif hg38_vcf_dir : return VcfFile . populate_fields ( random . choice ( all_files ( hg38_vcf_dir ))) else : raise InputError ( \"Must specify hg38 template VCF!\" ) else : raise IncompatibleGenomeAssemblyError ( proband_causative_variants [ 0 ] . assembly , phenopacket_path ) spike_and_update_phenopacket ( hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir , output_dir , phenopacket_path ) Spike the VCF files with genetic variants relevant to the provided Phenopacket, update the Phenopacket accordingly, and write the updated Phenopacket to the specified output directory. Parameters: Name Type Description Default hg19_vcf_info VcfFile VCF file info for hg19 template vcf. required hg38_vcf_info VcfFile VCF file info for hg38 template vcf. required hg19_vcf_dir Path The directory containing the hg19 VCF files. required hg38_vcf_dir Path The directory containing the hg38 VCF files. required output_dir Path Directory where the updated Phenopacket will be saved. required phenopacket_path Path Path to the original Phenopacket file. required Returns: Type Description None None Source code in src/pheval/prepare/create_spiked_vcf.py 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 def spike_and_update_phenopacket ( hg19_vcf_info : VcfFile , hg38_vcf_info : VcfFile , hg19_vcf_dir : Path , hg38_vcf_dir : Path , output_dir : Path , phenopacket_path : Path , ) -> None : \"\"\" Spike the VCF files with genetic variants relevant to the provided Phenopacket, update the Phenopacket accordingly, and write the updated Phenopacket to the specified output directory. Args: hg19_vcf_info (VcfFile): VCF file info for hg19 template vcf. hg38_vcf_info (VcfFile): VCF file info for hg38 template vcf. hg19_vcf_dir (Path): The directory containing the hg19 VCF files. hg38_vcf_dir (Path): The directory containing the hg38 VCF files. output_dir (Path): Directory where the updated Phenopacket will be saved. phenopacket_path (Path): Path to the original Phenopacket file. Returns: None \"\"\" phenopacket = phenopacket_reader ( phenopacket_path ) spiked_vcf_file_message = generate_spiked_vcf_file ( output_dir , phenopacket , phenopacket_path , hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir , ) updated_phenopacket = PhenopacketRebuilder ( phenopacket ) . add_spiked_vcf_path ( spiked_vcf_file_message ) write_phenopacket ( updated_phenopacket , phenopacket_path ) spike_vcf_contents ( phenopacket , phenopacket_path , hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir ) Spike VCF records with variants obtained from a Phenopacket or Family. Parameters: Name Type Description Default phenopacket Union [ Phenopacket , Family ] Phenopacket or Family containing causative variants. required phenopacket_path Path Path to the Phenopacket file. required hg19_vcf_info VcfFile VCF file info for hg19 template vcf. required hg38_vcf_info VcfFile VCF file info for hg38 template vcf. required hg19_vcf_dir Path The directory containing the hg19 VCF files. required hg38_vcf_dir Path The directory containing the hg38 VCF files. required Returns: Type Description tuple [ str , List [ str ]] A tuple containing: assembly (str): The genome assembly information extracted from VCF header. modified_vcf_contents (List[str]): Modified VCF records with spiked variants. Source code in src/pheval/prepare/create_spiked_vcf.py 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 def spike_vcf_contents ( phenopacket : Union [ Phenopacket , Family ], phenopacket_path : Path , hg19_vcf_info : VcfFile , hg38_vcf_info : VcfFile , hg19_vcf_dir : Path , hg38_vcf_dir : Path , ) -> tuple [ str , List [ str ]]: \"\"\" Spike VCF records with variants obtained from a Phenopacket or Family. Args: phenopacket (Union[Phenopacket, Family]): Phenopacket or Family containing causative variants. phenopacket_path (Path): Path to the Phenopacket file. hg19_vcf_info (VcfFile): VCF file info for hg19 template vcf. hg38_vcf_info (VcfFile): VCF file info for hg38 template vcf. hg19_vcf_dir (Path): The directory containing the hg19 VCF files. hg38_vcf_dir (Path): The directory containing the hg38 VCF files. Returns: A tuple containing: assembly (str): The genome assembly information extracted from VCF header. modified_vcf_contents (List[str]): Modified VCF records with spiked variants. \"\"\" phenopacket_causative_variants = PhenopacketUtil ( phenopacket ) . causative_variants () chosen_template_vcf = select_vcf_template ( phenopacket_path , phenopacket_causative_variants , hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir , ) check_variant_assembly ( phenopacket_causative_variants , chosen_template_vcf . vcf_header , phenopacket_path ) return ( chosen_template_vcf . vcf_header . assembly , VcfSpiker ( chosen_template_vcf . vcf_contents , phenopacket_causative_variants , chosen_template_vcf . vcf_header , ) . construct_vcf ( chosen_template_vcf . vcf_file_name ), ) spike_vcfs ( output_dir , phenopacket_path , phenopacket_dir , hg19_template_vcf , hg38_template_vcf , hg19_vcf_dir , hg38_vcf_dir ) Create spiked VCF from either a Phenopacket or a Phenopacket directory. Parameters: Name Type Description Default output_dir Path The directory to store the generated spiked VCF file(s). required phenopacket_path Path Path to a single Phenopacket file (optional). required phenopacket_dir Path Path to a directory containing Phenopacket files (optional). required hg19_template_vcf Path Path to the hg19 template VCF file (optional). required hg38_template_vcf Path Path to the hg38 template VCF file (optional). required hg19_vcf_dir Path The directory containing the hg19 VCF files (optional). required hg38_vcf_dir Path The directory containing the hg38 VCF files (optional). required Source code in src/pheval/prepare/create_spiked_vcf.py 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 def spike_vcfs ( output_dir : Path , phenopacket_path : Path , phenopacket_dir : Path , hg19_template_vcf : Path , hg38_template_vcf : Path , hg19_vcf_dir : Path , hg38_vcf_dir : Path , ) -> None : \"\"\" Create spiked VCF from either a Phenopacket or a Phenopacket directory. Args: output_dir (Path): The directory to store the generated spiked VCF file(s). phenopacket_path (Path): Path to a single Phenopacket file (optional). phenopacket_dir (Path): Path to a directory containing Phenopacket files (optional). hg19_template_vcf (Path): Path to the hg19 template VCF file (optional). hg38_template_vcf (Path): Path to the hg38 template VCF file (optional). hg19_vcf_dir (Path): The directory containing the hg19 VCF files (optional). hg38_vcf_dir (Path): The directory containing the hg38 VCF files (optional). \"\"\" if phenopacket_path is not None : create_spiked_vcf ( output_dir , phenopacket_path , hg19_template_vcf , hg38_template_vcf , hg19_vcf_dir , hg38_vcf_dir , ) elif phenopacket_dir is not None : create_spiked_vcfs ( output_dir , phenopacket_dir , hg19_template_vcf , hg38_template_vcf , hg19_vcf_dir , hg38_vcf_dir , )","title":"Create spiked vcf"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfFile","text":"Represents a VCF file with its name, contents, and header information. Attributes: Name Type Description vcf_file_name str The name of the VCF file. vcf_contents List [ str ] The contents of the VCF file. vcf_header VcfHeader The parsed header information of the VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 @dataclass class VcfFile : \"\"\" Represents a VCF file with its name, contents, and header information. Attributes: vcf_file_name (str): The name of the VCF file. vcf_contents (List[str]): The contents of the VCF file. vcf_header (VcfHeader): The parsed header information of the VCF file. \"\"\" vcf_file_name : str = None vcf_contents : List [ str ] = None vcf_header : VcfHeader = None @staticmethod def populate_fields ( template_vcf : Path ): \"\"\" Populate the fields of the VcfFile instance using the contents of a template VCF file. Args: template_vcf (Path): The path to the template VCF file. Returns: VcfFile: An instance of VcfFile with populated fields. \"\"\" contents = read_vcf ( template_vcf ) return VcfFile ( template_vcf . name , contents , VcfHeaderParser ( contents ) . parse_vcf_header ())","title":"VcfFile"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfFile.populate_fields","text":"Populate the fields of the VcfFile instance using the contents of a template VCF file. Parameters: Name Type Description Default template_vcf Path The path to the template VCF file. required Returns: Name Type Description VcfFile An instance of VcfFile with populated fields. Source code in src/pheval/prepare/create_spiked_vcf.py 190 191 192 193 194 195 196 197 198 199 200 201 202 203 @staticmethod def populate_fields ( template_vcf : Path ): \"\"\" Populate the fields of the VcfFile instance using the contents of a template VCF file. Args: template_vcf (Path): The path to the template VCF file. Returns: VcfFile: An instance of VcfFile with populated fields. \"\"\" contents = read_vcf ( template_vcf ) return VcfFile ( template_vcf . name , contents , VcfHeaderParser ( contents ) . parse_vcf_header ())","title":"populate_fields"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfHeader","text":"Data obtained from VCF header. Parameters: Name Type Description Default sample_id str The sample identifier from the VCF header. required assembly str The assembly information obtained from the VCF header. required chr_status bool A boolean indicating whether the VCF denotes chromosomes as chr or not. required Source code in src/pheval/prepare/create_spiked_vcf.py 78 79 80 81 82 83 84 85 86 87 88 89 90 @dataclass class VcfHeader : \"\"\"Data obtained from VCF header. Args: sample_id (str): The sample identifier from the VCF header. assembly (str): The assembly information obtained from the VCF header. chr_status (bool): A boolean indicating whether the VCF denotes chromosomes as chr or not. \"\"\" sample_id : str assembly : str chr_status : bool","title":"VcfHeader"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfHeaderParser","text":"Class for parsing the header of a VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 class VcfHeaderParser : \"\"\"Class for parsing the header of a VCF file.\"\"\" def __init__ ( self , vcf_contents : list [ str ]): \"\"\" Initialise the VcfHeaderParser. Args: vcf_contents (list[str]): The contents of the VCF file as a list of strings. \"\"\" self . vcf_contents = vcf_contents def parse_assembly ( self ) -> tuple [ str , bool ]: \"\"\" Parse the genome assembly and format of vcf_records. Returns: Tuple[str, bool]: A tuple containing the assembly and chromosome status (True/False). \"\"\" vcf_assembly = {} chr_status = False for line in self . vcf_contents : if line . startswith ( \"##contig=<ID\" ): tokens = line . split ( \",\" ) chromosome = re . sub ( r \"^.*?ID=\" , \"\" , [ token for token in tokens if \"ID=\" in token ][ 0 ] ) if \"chr\" in chromosome : chr_status = True chromosome = chromosome . replace ( \"chr\" , \"\" ) contig_length = re . sub ( \"[^0-9]+\" , \"\" , [ token for token in tokens if \"length=\" in token ][ 0 ], ) vcf_assembly [ chromosome ] = int ( contig_length ) vcf_assembly = { i : vcf_assembly [ i ] for i in vcf_assembly if i . isdigit ()} assembly = [ k for k , v in genome_assemblies . items () if v == vcf_assembly ][ 0 ] return assembly , chr_status def parse_sample_id ( self ) -> str : \"\"\" Parse the sample ID of the VCF. Returns: str: The sample ID extracted from the VCF header. \"\"\" for line in self . vcf_contents : if line . startswith ( \"#CHROM\" ): return line . split ( \" \\t \" )[ 9 ] . rstrip () def parse_vcf_header ( self ) -> VcfHeader : \"\"\" Parse the header of the VCF. Returns: VcfHeader: An instance of VcfHeader containing sample ID, assembly, and chromosome status. \"\"\" assembly , chr_status = self . parse_assembly () sample_id = self . parse_sample_id () return VcfHeader ( sample_id , assembly , chr_status )","title":"VcfHeaderParser"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.__init__","text":"Initialise the VcfHeaderParser. Parameters: Name Type Description Default vcf_contents list [ str ] The contents of the VCF file as a list of strings. required Source code in src/pheval/prepare/create_spiked_vcf.py 115 116 117 118 119 120 121 122 def __init__ ( self , vcf_contents : list [ str ]): \"\"\" Initialise the VcfHeaderParser. Args: vcf_contents (list[str]): The contents of the VCF file as a list of strings. \"\"\" self . vcf_contents = vcf_contents","title":"__init__"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.parse_assembly","text":"Parse the genome assembly and format of vcf_records. Returns: Type Description tuple [ str , bool ] Tuple[str, bool]: A tuple containing the assembly and chromosome status (True/False). Source code in src/pheval/prepare/create_spiked_vcf.py 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 def parse_assembly ( self ) -> tuple [ str , bool ]: \"\"\" Parse the genome assembly and format of vcf_records. Returns: Tuple[str, bool]: A tuple containing the assembly and chromosome status (True/False). \"\"\" vcf_assembly = {} chr_status = False for line in self . vcf_contents : if line . startswith ( \"##contig=<ID\" ): tokens = line . split ( \",\" ) chromosome = re . sub ( r \"^.*?ID=\" , \"\" , [ token for token in tokens if \"ID=\" in token ][ 0 ] ) if \"chr\" in chromosome : chr_status = True chromosome = chromosome . replace ( \"chr\" , \"\" ) contig_length = re . sub ( \"[^0-9]+\" , \"\" , [ token for token in tokens if \"length=\" in token ][ 0 ], ) vcf_assembly [ chromosome ] = int ( contig_length ) vcf_assembly = { i : vcf_assembly [ i ] for i in vcf_assembly if i . isdigit ()} assembly = [ k for k , v in genome_assemblies . items () if v == vcf_assembly ][ 0 ] return assembly , chr_status","title":"parse_assembly"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.parse_sample_id","text":"Parse the sample ID of the VCF. Returns: Name Type Description str str The sample ID extracted from the VCF header. Source code in src/pheval/prepare/create_spiked_vcf.py 152 153 154 155 156 157 158 159 160 161 def parse_sample_id ( self ) -> str : \"\"\" Parse the sample ID of the VCF. Returns: str: The sample ID extracted from the VCF header. \"\"\" for line in self . vcf_contents : if line . startswith ( \"#CHROM\" ): return line . split ( \" \\t \" )[ 9 ] . rstrip ()","title":"parse_sample_id"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfHeaderParser.parse_vcf_header","text":"Parse the header of the VCF. Returns: Name Type Description VcfHeader VcfHeader An instance of VcfHeader containing sample ID, assembly, and chromosome status. Source code in src/pheval/prepare/create_spiked_vcf.py 163 164 165 166 167 168 169 170 171 172 def parse_vcf_header ( self ) -> VcfHeader : \"\"\" Parse the header of the VCF. Returns: VcfHeader: An instance of VcfHeader containing sample ID, assembly, and chromosome status. \"\"\" assembly , chr_status = self . parse_assembly () sample_id = self . parse_sample_id () return VcfHeader ( sample_id , assembly , chr_status )","title":"parse_vcf_header"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfSpiker","text":"Class for spiking proband variants into template VCF file contents. Source code in src/pheval/prepare/create_spiked_vcf.py 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 class VcfSpiker : \"\"\"Class for spiking proband variants into template VCF file contents.\"\"\" def __init__ ( self , vcf_contents : list [ str ], proband_causative_variants : list [ ProbandCausativeVariant ], vcf_header : VcfHeader , ): \"\"\" Initialise the VcfSpiker. Args: vcf_contents (List[str]): Contents of the template VCF file. proband_causative_variants (List[ProbandCausativeVariant]): List of proband causative variants. vcf_header (VcfHeader): The VCF header information. \"\"\" self . vcf_contents = vcf_contents self . proband_causative_variants = proband_causative_variants self . vcf_header = vcf_header def construct_variant_entry ( self , proband_variant_data : ProbandCausativeVariant ) -> List [ str ]: \"\"\" Construct variant entries. Args: proband_variant_data (ProbandCausativeVariant): Data for the proband variant. Returns: List[str]: Constructed variant entry as a list of strings. \"\"\" genotype_codes = { \"hemizygous\" : \"0/1\" , \"homozygous\" : \"1/1\" , \"heterozygous\" : \"0/1\" , \"compound heterozygous\" : \"0/1\" , } if self . vcf_header . chr_status is True and \"chr\" not in proband_variant_data . variant . chrom : proband_variant_data . variant . chrom = \"chr\" + proband_variant_data . variant . chrom return [ proband_variant_data . variant . chrom , str ( proband_variant_data . variant . pos ), \".\" , proband_variant_data . variant . ref , ( f \"< { proband_variant_data . variant . alt } >\" if proband_variant_data . variant . ref == \"N\" else proband_variant_data . variant . alt ), \"100\" , \"PASS\" , proband_variant_data . info if proband_variant_data . info else \".\" , \"GT\" , genotype_codes [ proband_variant_data . genotype . lower ()] + \" \\n \" , ] def construct_vcf_records ( self , template_vcf_name : str ) -> List [ str ]: \"\"\" Construct updated VCF records by inserting spiked variants into the correct positions within the VCF. Args: template_vcf_name (str): Name of the template VCF file. Returns: List[str]: Updated VCF records containing the spiked variants. \"\"\" updated_vcf_records = copy ( self . vcf_contents ) for variant in self . proband_causative_variants : variant_entry = self . construct_variant_entry ( variant ) matching_indices = [ i for i , val in enumerate ( updated_vcf_records ) if val . split ( \" \\t \" )[ 0 ] == variant_entry [ 0 ] and int ( val . split ( \" \\t \" )[ 1 ]) < int ( variant_entry [ 1 ]) ] if matching_indices : variant_entry_position = matching_indices [ - 1 ] + 1 else : info_log . warning ( f \"Could not find entry position for { variant . variant . chrom } - { variant . variant . pos } -\" f \" { variant . variant . ref } - { variant . variant . alt } in { template_vcf_name } , \" \"inserting at end of VCF contents.\" ) variant_entry_position = len ( updated_vcf_records ) updated_vcf_records . insert ( variant_entry_position , \" \\t \" . join ( variant_entry )) return updated_vcf_records def construct_header ( self , updated_vcf_records : List [ str ]) -> List [ str ]: \"\"\" Construct the header of the VCF. Args: updated_vcf_records (List[str]): Updated VCF records. Returns: List[str]: Constructed header as a list of strings. \"\"\" updated_vcf_file = [] for line in updated_vcf_records : if line . startswith ( \"#\" ): text = line . replace ( self . vcf_header . sample_id , self . proband_causative_variants [ 0 ] . proband_id , ) else : text = line updated_vcf_file . append ( text ) return updated_vcf_file def construct_vcf ( self , template_vcf_name : str ) -> List [ str ]: \"\"\" Construct the entire spiked VCF file by incorporating the spiked variants into the VCF. Args: template_vcf_name (str): Name of the template VCF file. Returns: List[str]: The complete spiked VCF file content as a list of strings. \"\"\" return self . construct_header ( self . construct_vcf_records ( template_vcf_name ))","title":"VcfSpiker"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfSpiker.__init__","text":"Initialise the VcfSpiker. Parameters: Name Type Description Default vcf_contents List [ str ] Contents of the template VCF file. required proband_causative_variants List [ ProbandCausativeVariant ] List of proband causative variants. required vcf_header VcfHeader The VCF header information. required Source code in src/pheval/prepare/create_spiked_vcf.py 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 def __init__ ( self , vcf_contents : list [ str ], proband_causative_variants : list [ ProbandCausativeVariant ], vcf_header : VcfHeader , ): \"\"\" Initialise the VcfSpiker. Args: vcf_contents (List[str]): Contents of the template VCF file. proband_causative_variants (List[ProbandCausativeVariant]): List of proband causative variants. vcf_header (VcfHeader): The VCF header information. \"\"\" self . vcf_contents = vcf_contents self . proband_causative_variants = proband_causative_variants self . vcf_header = vcf_header","title":"__init__"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_header","text":"Construct the header of the VCF. Parameters: Name Type Description Default updated_vcf_records List [ str ] Updated VCF records. required Returns: Type Description List [ str ] List[str]: Constructed header as a list of strings. Source code in src/pheval/prepare/create_spiked_vcf.py 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 def construct_header ( self , updated_vcf_records : List [ str ]) -> List [ str ]: \"\"\" Construct the header of the VCF. Args: updated_vcf_records (List[str]): Updated VCF records. Returns: List[str]: Constructed header as a list of strings. \"\"\" updated_vcf_file = [] for line in updated_vcf_records : if line . startswith ( \"#\" ): text = line . replace ( self . vcf_header . sample_id , self . proband_causative_variants [ 0 ] . proband_id , ) else : text = line updated_vcf_file . append ( text ) return updated_vcf_file","title":"construct_header"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_variant_entry","text":"Construct variant entries. Parameters: Name Type Description Default proband_variant_data ProbandCausativeVariant Data for the proband variant. required Returns: Type Description List [ str ] List[str]: Constructed variant entry as a list of strings. Source code in src/pheval/prepare/create_spiked_vcf.py 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 def construct_variant_entry ( self , proband_variant_data : ProbandCausativeVariant ) -> List [ str ]: \"\"\" Construct variant entries. Args: proband_variant_data (ProbandCausativeVariant): Data for the proband variant. Returns: List[str]: Constructed variant entry as a list of strings. \"\"\" genotype_codes = { \"hemizygous\" : \"0/1\" , \"homozygous\" : \"1/1\" , \"heterozygous\" : \"0/1\" , \"compound heterozygous\" : \"0/1\" , } if self . vcf_header . chr_status is True and \"chr\" not in proband_variant_data . variant . chrom : proband_variant_data . variant . chrom = \"chr\" + proband_variant_data . variant . chrom return [ proband_variant_data . variant . chrom , str ( proband_variant_data . variant . pos ), \".\" , proband_variant_data . variant . ref , ( f \"< { proband_variant_data . variant . alt } >\" if proband_variant_data . variant . ref == \"N\" else proband_variant_data . variant . alt ), \"100\" , \"PASS\" , proband_variant_data . info if proband_variant_data . info else \".\" , \"GT\" , genotype_codes [ proband_variant_data . genotype . lower ()] + \" \\n \" , ]","title":"construct_variant_entry"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_vcf","text":"Construct the entire spiked VCF file by incorporating the spiked variants into the VCF. Parameters: Name Type Description Default template_vcf_name str Name of the template VCF file. required Returns: Type Description List [ str ] List[str]: The complete spiked VCF file content as a list of strings. Source code in src/pheval/prepare/create_spiked_vcf.py 393 394 395 396 397 398 399 400 401 402 403 def construct_vcf ( self , template_vcf_name : str ) -> List [ str ]: \"\"\" Construct the entire spiked VCF file by incorporating the spiked variants into the VCF. Args: template_vcf_name (str): Name of the template VCF file. Returns: List[str]: The complete spiked VCF file content as a list of strings. \"\"\" return self . construct_header ( self . construct_vcf_records ( template_vcf_name ))","title":"construct_vcf"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfSpiker.construct_vcf_records","text":"Construct updated VCF records by inserting spiked variants into the correct positions within the VCF. Parameters: Name Type Description Default template_vcf_name str Name of the template VCF file. required Returns: Type Description List [ str ] List[str]: Updated VCF records containing the spiked variants. Source code in src/pheval/prepare/create_spiked_vcf.py 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 def construct_vcf_records ( self , template_vcf_name : str ) -> List [ str ]: \"\"\" Construct updated VCF records by inserting spiked variants into the correct positions within the VCF. Args: template_vcf_name (str): Name of the template VCF file. Returns: List[str]: Updated VCF records containing the spiked variants. \"\"\" updated_vcf_records = copy ( self . vcf_contents ) for variant in self . proband_causative_variants : variant_entry = self . construct_variant_entry ( variant ) matching_indices = [ i for i , val in enumerate ( updated_vcf_records ) if val . split ( \" \\t \" )[ 0 ] == variant_entry [ 0 ] and int ( val . split ( \" \\t \" )[ 1 ]) < int ( variant_entry [ 1 ]) ] if matching_indices : variant_entry_position = matching_indices [ - 1 ] + 1 else : info_log . warning ( f \"Could not find entry position for { variant . variant . chrom } - { variant . variant . pos } -\" f \" { variant . variant . ref } - { variant . variant . alt } in { template_vcf_name } , \" \"inserting at end of VCF contents.\" ) variant_entry_position = len ( updated_vcf_records ) updated_vcf_records . insert ( variant_entry_position , \" \\t \" . join ( variant_entry )) return updated_vcf_records","title":"construct_vcf_records"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfWriter","text":"Class for writing VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 class VcfWriter : \"\"\"Class for writing VCF file.\"\"\" def __init__ ( self , vcf_contents : List [ str ], spiked_vcf_file_path : Path , ): \"\"\" Initialise the VcfWriter class. Args: vcf_contents (List[str]): Contents of the VCF file to be written. spiked_vcf_file_path (Path): Path to the spiked VCF file to be created. \"\"\" self . vcf_contents = vcf_contents self . spiked_vcf_file_path = spiked_vcf_file_path def write_gzip ( self ) -> None : \"\"\" Write the VCF contents to a gzipped VCF file. \"\"\" encoded_contents = [ line . encode () for line in self . vcf_contents ] with gzip . open ( self . spiked_vcf_file_path , \"wb\" ) as f : for line in encoded_contents : f . write ( line ) f . close () def write_uncompressed ( self ) -> None : \"\"\" Write the VCF contents to an uncompressed VCF file. \"\"\" with open ( self . spiked_vcf_file_path , \"w\" ) as file : file . writelines ( self . vcf_contents ) file . close () def write_vcf_file ( self ) -> None : \"\"\" Write the VCF file based on compression type. Determines the file writing method based on the compression type of the spiked VCF file path. Writes the VCF contents to the corresponding file format (gzip or uncompressed). \"\"\" self . write_gzip () if is_gzipped ( self . spiked_vcf_file_path ) else self . write_uncompressed ()","title":"VcfWriter"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfWriter.__init__","text":"Initialise the VcfWriter class. Parameters: Name Type Description Default vcf_contents List [ str ] Contents of the VCF file to be written. required spiked_vcf_file_path Path Path to the spiked VCF file to be created. required Source code in src/pheval/prepare/create_spiked_vcf.py 409 410 411 412 413 414 415 416 417 418 419 420 421 422 def __init__ ( self , vcf_contents : List [ str ], spiked_vcf_file_path : Path , ): \"\"\" Initialise the VcfWriter class. Args: vcf_contents (List[str]): Contents of the VCF file to be written. spiked_vcf_file_path (Path): Path to the spiked VCF file to be created. \"\"\" self . vcf_contents = vcf_contents self . spiked_vcf_file_path = spiked_vcf_file_path","title":"__init__"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfWriter.write_gzip","text":"Write the VCF contents to a gzipped VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 424 425 426 427 428 429 430 431 432 def write_gzip ( self ) -> None : \"\"\" Write the VCF contents to a gzipped VCF file. \"\"\" encoded_contents = [ line . encode () for line in self . vcf_contents ] with gzip . open ( self . spiked_vcf_file_path , \"wb\" ) as f : for line in encoded_contents : f . write ( line ) f . close ()","title":"write_gzip"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfWriter.write_uncompressed","text":"Write the VCF contents to an uncompressed VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 434 435 436 437 438 439 440 def write_uncompressed ( self ) -> None : \"\"\" Write the VCF contents to an uncompressed VCF file. \"\"\" with open ( self . spiked_vcf_file_path , \"w\" ) as file : file . writelines ( self . vcf_contents ) file . close ()","title":"write_uncompressed"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.VcfWriter.write_vcf_file","text":"Write the VCF file based on compression type. Determines the file writing method based on the compression type of the spiked VCF file path. Writes the VCF contents to the corresponding file format (gzip or uncompressed). Source code in src/pheval/prepare/create_spiked_vcf.py 442 443 444 445 446 447 448 449 def write_vcf_file ( self ) -> None : \"\"\" Write the VCF file based on compression type. Determines the file writing method based on the compression type of the spiked VCF file path. Writes the VCF contents to the corresponding file format (gzip or uncompressed). \"\"\" self . write_gzip () if is_gzipped ( self . spiked_vcf_file_path ) else self . write_uncompressed ()","title":"write_vcf_file"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.check_variant_assembly","text":"Check the assembly of the variant assembly against the VCF. Parameters: Name Type Description Default proband_causative_variants List [ ProbandCausativeVariant ] A list of causative variants from the proband. required vcf_header VcfHeader An instance of VcfHeader representing the VCF file's header. required phenopacket_path Path The path to the Phenopacket file. required Raises: Type Description ValueError If there are too many or incompatible genome assemblies found. IncompatibleGenomeAssemblyError If the assembly in the Phenopacket does not match the VCF assembly. Source code in src/pheval/prepare/create_spiked_vcf.py 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 def check_variant_assembly ( proband_causative_variants : list [ ProbandCausativeVariant ], vcf_header : VcfHeader , phenopacket_path : Path , ) -> None : \"\"\" Check the assembly of the variant assembly against the VCF. Args: proband_causative_variants (List[ProbandCausativeVariant]): A list of causative variants from the proband. vcf_header (VcfHeader): An instance of VcfHeader representing the VCF file's header. phenopacket_path (Path): The path to the Phenopacket file. Raises: ValueError: If there are too many or incompatible genome assemblies found. IncompatibleGenomeAssemblyError: If the assembly in the Phenopacket does not match the VCF assembly. \"\"\" compatible_genome_assembly = { \"GRCh37\" , \"hg19\" , \"GRCh38\" , \"hg38\" } phenopacket_assembly = list ({ variant . assembly for variant in proband_causative_variants }) if len ( phenopacket_assembly ) > 1 : raise ValueError ( \"Too many genome assemblies!\" ) if phenopacket_assembly [ 0 ] not in compatible_genome_assembly : raise IncompatibleGenomeAssemblyError ( phenopacket_assembly , phenopacket_path ) if ( phenopacket_assembly [ 0 ] in { \"hg19\" , \"GRCh37\" } and vcf_header . assembly not in { \"hg19\" , \"GRCh37\" } ) or ( phenopacket_assembly [ 0 ] in { \"hg38\" , \"GRCh38\" } and vcf_header . assembly not in { \"hg38\" , \"GRCh38\" } ): raise IncompatibleGenomeAssemblyError ( assembly = phenopacket_assembly , phenopacket = phenopacket_path )","title":"check_variant_assembly"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.create_spiked_vcf","text":"Create a spiked VCF for a Phenopacket. Parameters: Name Type Description Default output_dir Path The directory to store the generated spiked VCF file. required phenopacket_path Path Path to the Phenopacket file. required hg19_template_vcf Path Path to the hg19 template VCF file (optional). required hg38_template_vcf Path Path to the hg38 template VCF file (optional). required hg19_vcf_dir Path The directory containing the hg19 VCF files (optional). required hg38_vcf_dir Path The directory containing the hg38 VCF files (optional). required Raises: Type Description InputError If both hg19_template_vcf and hg38_template_vcf are None. Source code in src/pheval/prepare/create_spiked_vcf.py 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 def create_spiked_vcf ( output_dir : Path , phenopacket_path : Path , hg19_template_vcf : Path , hg38_template_vcf : Path , hg19_vcf_dir : Path , hg38_vcf_dir : Path , ) -> None : \"\"\" Create a spiked VCF for a Phenopacket. Args: output_dir (Path): The directory to store the generated spiked VCF file. phenopacket_path (Path): Path to the Phenopacket file. hg19_template_vcf (Path): Path to the hg19 template VCF file (optional). hg38_template_vcf (Path): Path to the hg38 template VCF file (optional). hg19_vcf_dir (Path): The directory containing the hg19 VCF files (optional). hg38_vcf_dir (Path): The directory containing the hg38 VCF files (optional). Raises: InputError: If both hg19_template_vcf and hg38_template_vcf are None. \"\"\" if hg19_template_vcf is None and hg38_template_vcf is None : raise InputError ( \"Either a hg19 template vcf or hg38 template vcf must be specified\" ) hg19_vcf_info = VcfFile . populate_fields ( hg19_template_vcf ) if hg19_template_vcf else None hg38_vcf_info = VcfFile . populate_fields ( hg38_template_vcf ) if hg38_template_vcf else None spike_and_update_phenopacket ( hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir , output_dir , phenopacket_path )","title":"create_spiked_vcf"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.create_spiked_vcfs","text":"Create a spiked VCF for a directory of Phenopackets. Parameters: Name Type Description Default output_dir Path The directory to store the generated spiked VCF file. required phenopacket_dir Path Path to the Phenopacket directory. required hg19_template_vcf Path Path to the template hg19 VCF file (optional). required hg38_template_vcf Path Path to the template hg19 VCF file (optional). required hg19_vcf_dir Path The directory containing the hg19 VCF files (optional). required hg38_vcf_dir Path The directory containing the hg38 VCF files (optional). required Raises: Type Description InputError If both hg19_template_vcf and hg38_template_vcf are None. Source code in src/pheval/prepare/create_spiked_vcf.py 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 def create_spiked_vcfs ( output_dir : Path , phenopacket_dir : Path , hg19_template_vcf : Path , hg38_template_vcf : Path , hg19_vcf_dir : Path , hg38_vcf_dir : Path , ) -> None : \"\"\" Create a spiked VCF for a directory of Phenopackets. Args: output_dir (Path): The directory to store the generated spiked VCF file. phenopacket_dir (Path): Path to the Phenopacket directory. hg19_template_vcf (Path): Path to the template hg19 VCF file (optional). hg38_template_vcf (Path): Path to the template hg19 VCF file (optional). hg19_vcf_dir (Path): The directory containing the hg19 VCF files (optional). hg38_vcf_dir (Path): The directory containing the hg38 VCF files (optional). Raises: InputError: If both hg19_template_vcf and hg38_template_vcf are None. \"\"\" if ( hg19_template_vcf is None and hg38_template_vcf is None and hg19_vcf_dir is None and hg38_vcf_dir is None ): raise InputError ( \"Need to specify a VCF!\" ) hg19_vcf_info = VcfFile . populate_fields ( hg19_template_vcf ) if hg19_template_vcf else None hg38_vcf_info = VcfFile . populate_fields ( hg38_template_vcf ) if hg38_template_vcf else None for phenopacket_path in files_with_suffix ( phenopacket_dir , \".json\" ): spike_and_update_phenopacket ( hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir , output_dir , phenopacket_path )","title":"create_spiked_vcfs"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.generate_spiked_vcf_file","text":"Write spiked VCF contents to a new file. Parameters: Name Type Description Default output_dir Path Path to the directory to store the generated file. required phenopacket Union [ Phenopacket , Family ] Phenopacket or Family containing causative variants. required phenopacket_path Path Path to the Phenopacket file. required hg19_vcf_info VcfFile VCF file info for hg19 template vcf. required hg38_vcf_info VcfFile VCF file info for hg38 template vcf. required hg19_vcf_dir Path The directory containing the hg19 VCF files. required hg38_vcf_dir Path The directory containing the hg38 VCF files. required Returns: File: The generated File object representing the newly created spiked VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 def generate_spiked_vcf_file ( output_dir : Path , phenopacket : Union [ Phenopacket , Family ], phenopacket_path : Path , hg19_vcf_info : VcfFile , hg38_vcf_info : VcfFile , hg19_vcf_dir : Path , hg38_vcf_dir : Path , ) -> File : \"\"\" Write spiked VCF contents to a new file. Args: output_dir (Path): Path to the directory to store the generated file. phenopacket (Union[Phenopacket, Family]): Phenopacket or Family containing causative variants. phenopacket_path (Path): Path to the Phenopacket file. hg19_vcf_info (VcfFile): VCF file info for hg19 template vcf. hg38_vcf_info (VcfFile): VCF file info for hg38 template vcf. hg19_vcf_dir (Path): The directory containing the hg19 VCF files. hg38_vcf_dir (Path): The directory containing the hg38 VCF files. Returns: File: The generated File object representing the newly created spiked VCF file. \"\"\" output_dir . mkdir ( exist_ok = True ) info_log . info ( f \" Created a directory { output_dir } \" ) vcf_assembly , spiked_vcf = spike_vcf_contents ( phenopacket , phenopacket_path , hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir ) spiked_vcf_path = output_dir . joinpath ( phenopacket_path . name . replace ( \".json\" , \".vcf.gz\" )) VcfWriter ( spiked_vcf , spiked_vcf_path ) . write_vcf_file () return File ( uri = urllib . parse . unquote ( spiked_vcf_path . as_uri ()), file_attributes = { \"fileFormat\" : \"vcf\" , \"genomeAssembly\" : vcf_assembly }, )","title":"generate_spiked_vcf_file"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.read_vcf","text":"Read the contents of a VCF file into memory, handling both uncompressed and gzipped files. Parameters: Name Type Description Default vcf_file Path The path to the VCF file to be read. required Returns: Type Description List [ str ] List[str]: A list containing the lines of the VCF file. Source code in src/pheval/prepare/create_spiked_vcf.py 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 def read_vcf ( vcf_file : Path ) -> List [ str ]: \"\"\" Read the contents of a VCF file into memory, handling both uncompressed and gzipped files. Args: vcf_file (Path): The path to the VCF file to be read. Returns: List[str]: A list containing the lines of the VCF file. \"\"\" open_fn = gzip . open if is_gzipped ( vcf_file ) else open vcf = open_fn ( vcf_file ) vcf_contents = ( [ line . decode () for line in vcf . readlines ()] if is_gzipped ( vcf_file ) else vcf . readlines () ) vcf . close () return vcf_contents","title":"read_vcf"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.select_vcf_template","text":"Select the appropriate VCF template based on the assembly information of the proband causative variants. Parameters: Name Type Description Default phenopacket_path Path The path to the Phenopacket file. required proband_causative_variants List [ ProbandCausativeVariant ] A list of causative variants from the proband. required hg19_vcf_info VcfFile VCF file info for hg19 template vcf. required hg38_vcf_info VcfFile CF file info for hg38 template vcf. required hg19_vcf_dir Path The directory containing the hg19 VCF files. required hg38_vcf_dir Path The directory containing the hg38 VCF files. required Returns: Name Type Description VcfFile VcfFile The selected VCF template file based on the assembly information of the proband causative variants. Source code in src/pheval/prepare/create_spiked_vcf.py 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 def select_vcf_template ( phenopacket_path : Path , proband_causative_variants : List [ ProbandCausativeVariant ], hg19_vcf_info : VcfFile , hg38_vcf_info : VcfFile , hg19_vcf_dir : Path , hg38_vcf_dir : Path , ) -> VcfFile : \"\"\" Select the appropriate VCF template based on the assembly information of the proband causative variants. Args: phenopacket_path (Path): The path to the Phenopacket file. proband_causative_variants (List[ProbandCausativeVariant]): A list of causative variants from the proband. hg19_vcf_info (VcfFile): VCF file info for hg19 template vcf. hg38_vcf_info (VcfFile): CF file info for hg38 template vcf. hg19_vcf_dir (Path): The directory containing the hg19 VCF files. hg38_vcf_dir (Path): The directory containing the hg38 VCF files. Returns: VcfFile: The selected VCF template file based on the assembly information of the proband causative variants. \"\"\" if proband_causative_variants [ 0 ] . assembly in [ \"hg19\" , \"GRCh37\" ]: if hg19_vcf_info : return hg19_vcf_info elif hg19_vcf_dir : return VcfFile . populate_fields ( random . choice ( all_files ( hg19_vcf_dir ))) else : raise InputError ( \"Must specify hg19 template VCF!\" ) elif proband_causative_variants [ 0 ] . assembly in [ \"hg38\" , \"GRCh38\" ]: if hg38_vcf_info : return hg38_vcf_info elif hg38_vcf_dir : return VcfFile . populate_fields ( random . choice ( all_files ( hg38_vcf_dir ))) else : raise InputError ( \"Must specify hg38 template VCF!\" ) else : raise IncompatibleGenomeAssemblyError ( proband_causative_variants [ 0 ] . assembly , phenopacket_path )","title":"select_vcf_template"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.spike_and_update_phenopacket","text":"Spike the VCF files with genetic variants relevant to the provided Phenopacket, update the Phenopacket accordingly, and write the updated Phenopacket to the specified output directory. Parameters: Name Type Description Default hg19_vcf_info VcfFile VCF file info for hg19 template vcf. required hg38_vcf_info VcfFile VCF file info for hg38 template vcf. required hg19_vcf_dir Path The directory containing the hg19 VCF files. required hg38_vcf_dir Path The directory containing the hg38 VCF files. required output_dir Path Directory where the updated Phenopacket will be saved. required phenopacket_path Path Path to the original Phenopacket file. required Returns: Type Description None None Source code in src/pheval/prepare/create_spiked_vcf.py 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 def spike_and_update_phenopacket ( hg19_vcf_info : VcfFile , hg38_vcf_info : VcfFile , hg19_vcf_dir : Path , hg38_vcf_dir : Path , output_dir : Path , phenopacket_path : Path , ) -> None : \"\"\" Spike the VCF files with genetic variants relevant to the provided Phenopacket, update the Phenopacket accordingly, and write the updated Phenopacket to the specified output directory. Args: hg19_vcf_info (VcfFile): VCF file info for hg19 template vcf. hg38_vcf_info (VcfFile): VCF file info for hg38 template vcf. hg19_vcf_dir (Path): The directory containing the hg19 VCF files. hg38_vcf_dir (Path): The directory containing the hg38 VCF files. output_dir (Path): Directory where the updated Phenopacket will be saved. phenopacket_path (Path): Path to the original Phenopacket file. Returns: None \"\"\" phenopacket = phenopacket_reader ( phenopacket_path ) spiked_vcf_file_message = generate_spiked_vcf_file ( output_dir , phenopacket , phenopacket_path , hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir , ) updated_phenopacket = PhenopacketRebuilder ( phenopacket ) . add_spiked_vcf_path ( spiked_vcf_file_message ) write_phenopacket ( updated_phenopacket , phenopacket_path )","title":"spike_and_update_phenopacket"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.spike_vcf_contents","text":"Spike VCF records with variants obtained from a Phenopacket or Family. Parameters: Name Type Description Default phenopacket Union [ Phenopacket , Family ] Phenopacket or Family containing causative variants. required phenopacket_path Path Path to the Phenopacket file. required hg19_vcf_info VcfFile VCF file info for hg19 template vcf. required hg38_vcf_info VcfFile VCF file info for hg38 template vcf. required hg19_vcf_dir Path The directory containing the hg19 VCF files. required hg38_vcf_dir Path The directory containing the hg38 VCF files. required Returns: Type Description tuple [ str , List [ str ]] A tuple containing: assembly (str): The genome assembly information extracted from VCF header. modified_vcf_contents (List[str]): Modified VCF records with spiked variants. Source code in src/pheval/prepare/create_spiked_vcf.py 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 def spike_vcf_contents ( phenopacket : Union [ Phenopacket , Family ], phenopacket_path : Path , hg19_vcf_info : VcfFile , hg38_vcf_info : VcfFile , hg19_vcf_dir : Path , hg38_vcf_dir : Path , ) -> tuple [ str , List [ str ]]: \"\"\" Spike VCF records with variants obtained from a Phenopacket or Family. Args: phenopacket (Union[Phenopacket, Family]): Phenopacket or Family containing causative variants. phenopacket_path (Path): Path to the Phenopacket file. hg19_vcf_info (VcfFile): VCF file info for hg19 template vcf. hg38_vcf_info (VcfFile): VCF file info for hg38 template vcf. hg19_vcf_dir (Path): The directory containing the hg19 VCF files. hg38_vcf_dir (Path): The directory containing the hg38 VCF files. Returns: A tuple containing: assembly (str): The genome assembly information extracted from VCF header. modified_vcf_contents (List[str]): Modified VCF records with spiked variants. \"\"\" phenopacket_causative_variants = PhenopacketUtil ( phenopacket ) . causative_variants () chosen_template_vcf = select_vcf_template ( phenopacket_path , phenopacket_causative_variants , hg19_vcf_info , hg38_vcf_info , hg19_vcf_dir , hg38_vcf_dir , ) check_variant_assembly ( phenopacket_causative_variants , chosen_template_vcf . vcf_header , phenopacket_path ) return ( chosen_template_vcf . vcf_header . assembly , VcfSpiker ( chosen_template_vcf . vcf_contents , phenopacket_causative_variants , chosen_template_vcf . vcf_header , ) . construct_vcf ( chosen_template_vcf . vcf_file_name ), )","title":"spike_vcf_contents"},{"location":"api/pheval/prepare/create_spiked_vcf/#src.pheval.prepare.create_spiked_vcf.spike_vcfs","text":"Create spiked VCF from either a Phenopacket or a Phenopacket directory. Parameters: Name Type Description Default output_dir Path The directory to store the generated spiked VCF file(s). required phenopacket_path Path Path to a single Phenopacket file (optional). required phenopacket_dir Path Path to a directory containing Phenopacket files (optional). required hg19_template_vcf Path Path to the hg19 template VCF file (optional). required hg38_template_vcf Path Path to the hg38 template VCF file (optional). required hg19_vcf_dir Path The directory containing the hg19 VCF files (optional). required hg38_vcf_dir Path The directory containing the hg38 VCF files (optional). required Source code in src/pheval/prepare/create_spiked_vcf.py 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 def spike_vcfs ( output_dir : Path , phenopacket_path : Path , phenopacket_dir : Path , hg19_template_vcf : Path , hg38_template_vcf : Path , hg19_vcf_dir : Path , hg38_vcf_dir : Path , ) -> None : \"\"\" Create spiked VCF from either a Phenopacket or a Phenopacket directory. Args: output_dir (Path): The directory to store the generated spiked VCF file(s). phenopacket_path (Path): Path to a single Phenopacket file (optional). phenopacket_dir (Path): Path to a directory containing Phenopacket files (optional). hg19_template_vcf (Path): Path to the hg19 template VCF file (optional). hg38_template_vcf (Path): Path to the hg38 template VCF file (optional). hg19_vcf_dir (Path): The directory containing the hg19 VCF files (optional). hg38_vcf_dir (Path): The directory containing the hg38 VCF files (optional). \"\"\" if phenopacket_path is not None : create_spiked_vcf ( output_dir , phenopacket_path , hg19_template_vcf , hg38_template_vcf , hg19_vcf_dir , hg38_vcf_dir , ) elif phenopacket_dir is not None : create_spiked_vcfs ( output_dir , phenopacket_dir , hg19_template_vcf , hg38_template_vcf , hg19_vcf_dir , hg38_vcf_dir , )","title":"spike_vcfs"},{"location":"api/pheval/prepare/custom_exceptions/","text":"InputError Bases: Exception Exception raised for missing required inputs. Source code in src/pheval/prepare/custom_exceptions.py 4 5 6 7 8 9 10 11 12 13 class InputError ( Exception ): \"\"\"Exception raised for missing required inputs.\"\"\" def __init__ ( self , file , message = \"Missing required input\" ): self . file : str = file self . message : str = message super () . __init__ ( self . message ) def __str__ ( self ): return f \" { self . message } -> { self . file } \" MutuallyExclusiveOptionError Bases: Option Exception raised for when Source code in src/pheval/prepare/custom_exceptions.py 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 class MutuallyExclusiveOptionError ( Option ): \"\"\"Exception raised for when\"\"\" def __init__ ( self , * args , ** kwargs ): self . mutually_exclusive = set ( kwargs . pop ( \"mutually_exclusive\" , [])) help_ = kwargs . get ( \"help\" , \"\" ) if self . mutually_exclusive : ex_str = \", \" . join ( self . mutually_exclusive ) kwargs [ \"help\" ] = help_ + ( \" NOTE: This argument is mutually exclusive with \" \" arguments: [\" + ex_str + \"].\" ) super ( MutuallyExclusiveOptionError , self ) . __init__ ( * args , ** kwargs ) def handle_parse_result ( self , ctx , opts , args ): if self . mutually_exclusive . intersection ( opts ) and self . name in opts : raise UsageError ( \"Illegal usage: ` {} ` is mutually exclusive with \" \"arguments ` {} `.\" . format ( self . name , \", \" . join ( self . mutually_exclusive )) ) return super ( MutuallyExclusiveOptionError , self ) . handle_parse_result ( ctx , opts , args )","title":"Custom exceptions"},{"location":"api/pheval/prepare/custom_exceptions/#src.pheval.prepare.custom_exceptions.InputError","text":"Bases: Exception Exception raised for missing required inputs. Source code in src/pheval/prepare/custom_exceptions.py 4 5 6 7 8 9 10 11 12 13 class InputError ( Exception ): \"\"\"Exception raised for missing required inputs.\"\"\" def __init__ ( self , file , message = \"Missing required input\" ): self . file : str = file self . message : str = message super () . __init__ ( self . message ) def __str__ ( self ): return f \" { self . message } -> { self . file } \"","title":"InputError"},{"location":"api/pheval/prepare/custom_exceptions/#src.pheval.prepare.custom_exceptions.MutuallyExclusiveOptionError","text":"Bases: Option Exception raised for when Source code in src/pheval/prepare/custom_exceptions.py 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 class MutuallyExclusiveOptionError ( Option ): \"\"\"Exception raised for when\"\"\" def __init__ ( self , * args , ** kwargs ): self . mutually_exclusive = set ( kwargs . pop ( \"mutually_exclusive\" , [])) help_ = kwargs . get ( \"help\" , \"\" ) if self . mutually_exclusive : ex_str = \", \" . join ( self . mutually_exclusive ) kwargs [ \"help\" ] = help_ + ( \" NOTE: This argument is mutually exclusive with \" \" arguments: [\" + ex_str + \"].\" ) super ( MutuallyExclusiveOptionError , self ) . __init__ ( * args , ** kwargs ) def handle_parse_result ( self , ctx , opts , args ): if self . mutually_exclusive . intersection ( opts ) and self . name in opts : raise UsageError ( \"Illegal usage: ` {} ` is mutually exclusive with \" \"arguments ` {} `.\" . format ( self . name , \", \" . join ( self . mutually_exclusive )) ) return super ( MutuallyExclusiveOptionError , self ) . handle_parse_result ( ctx , opts , args )","title":"MutuallyExclusiveOptionError"},{"location":"api/pheval/prepare/prepare_corpus/","text":"prepare_corpus ( phenopacket_dir , variant_analysis , gene_analysis , disease_analysis , gene_identifier , hg19_template_vcf , hg38_template_vcf , hg19_vcf_dir , hg38_vcf_dir , output_dir ) Prepare a corpus of Phenopackets for analysis, optionally checking for complete variant records and updating gene identifiers. Parameters: Name Type Description Default phenopacket_dir Path The path to the directory containing Phenopackets. required variant_analysis bool If True, check for complete variant records in the Phenopackets. required gene_analysis bool If True, check for complete gene records in the Phenopackets. required disease_analysis bool If True, check for complete disease records in the Phenopackets. required gene_identifier str Identifier for updating gene identifiers, if applicable. required hg19_template_vcf Path Path to the hg19 template VCF file (optional), to spike variants into required hg38_template_vcf Path Path to the hg38 template VCF file (optional), to spike variants into required hg19_vcf_dir Path Path to the directory containing hg19 template VCF files (optional). required hg38_vcf_dir Path Path to the directory containing hg38 template VCF files (optional). required output_dir Path The directory to save the prepared Phenopackets and, optionally, VCF files. required Notes: To spike variants into VCFs for variant-based analysis at least one of hg19_template_vcf, hg38_template_vcf, hg19_vcf_dir or hg38_vcf_dir is required. Source code in src/pheval/prepare/prepare_corpus.py 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 def prepare_corpus ( phenopacket_dir : Path , variant_analysis : bool , gene_analysis : bool , disease_analysis : bool , gene_identifier : str , hg19_template_vcf : Path , hg38_template_vcf : Path , hg19_vcf_dir : Path , hg38_vcf_dir : Path , output_dir : Path , ) -> None : \"\"\" Prepare a corpus of Phenopackets for analysis, optionally checking for complete variant records and updating gene identifiers. Args: phenopacket_dir (Path): The path to the directory containing Phenopackets. variant_analysis (bool): If True, check for complete variant records in the Phenopackets. gene_analysis (bool): If True, check for complete gene records in the Phenopackets. disease_analysis (bool): If True, check for complete disease records in the Phenopackets. gene_identifier (str): Identifier for updating gene identifiers, if applicable. hg19_template_vcf (Path): Path to the hg19 template VCF file (optional), to spike variants into VCFs for variant-based analysis at least one of hg19_template_vcf or hg38_template_vcf is required. hg38_template_vcf (Path): Path to the hg38 template VCF file (optional), to spike variants into VCFs for variant-based analysis at least one of hg19_template_vcf or hg38_template_vcf is required. hg19_vcf_dir (Path): Path to the directory containing hg19 template VCF files (optional). hg38_vcf_dir (Path): Path to the directory containing hg38 template VCF files (optional). output_dir (Path): The directory to save the prepared Phenopackets and, optionally, VCF files. Notes: To spike variants into VCFs for variant-based analysis at least one of hg19_template_vcf, hg38_template_vcf, hg19_vcf_dir or hg38_vcf_dir is required. \"\"\" output_dir . joinpath ( \"phenopackets\" ) . mkdir ( exist_ok = True , parents = True ) for phenopacket_path in all_files ( phenopacket_dir ): phenopacket_util = PhenopacketUtil ( phenopacket_reader ( phenopacket_path )) if not phenopacket_util . observed_phenotypic_features (): info_log . warning ( f \"Removed { phenopacket_path . name } from the corpus due to no observed phenotypic features.\" ) continue if variant_analysis : if phenopacket_util . check_incomplete_variant_record (): info_log . warning ( f \"Removed { phenopacket_path . name } from the corpus due to missing variant fields.\" ) continue if gene_analysis : if phenopacket_util . check_incomplete_gene_record (): info_log . warning ( f \"Removed { phenopacket_path . name } from the corpus due to missing gene fields.\" ) continue if disease_analysis : if phenopacket_util . check_incomplete_disease_record (): info_log . warning ( f \"Removed { phenopacket_path . name } from the corpus due to missing disease fields.\" ) continue if hg19_template_vcf or hg38_template_vcf : output_dir . joinpath ( \"vcf\" ) . mkdir ( exist_ok = True ) create_spiked_vcf ( output_dir . joinpath ( \"vcf\" ), phenopacket_path , hg19_template_vcf , hg38_template_vcf , hg19_vcf_dir , hg38_vcf_dir , ) if gene_identifier : create_updated_phenopacket ( gene_identifier , phenopacket_path , output_dir . joinpath ( \"phenopackets\" ) ) else : # if not updating phenopacket gene identifiers then copy phenopacket as is to output directory shutil . copy ( phenopacket_path , output_dir . joinpath ( f \"phenopackets/ { phenopacket_path . name } \" ) )","title":"Prepare corpus"},{"location":"api/pheval/prepare/prepare_corpus/#src.pheval.prepare.prepare_corpus.prepare_corpus","text":"Prepare a corpus of Phenopackets for analysis, optionally checking for complete variant records and updating gene identifiers. Parameters: Name Type Description Default phenopacket_dir Path The path to the directory containing Phenopackets. required variant_analysis bool If True, check for complete variant records in the Phenopackets. required gene_analysis bool If True, check for complete gene records in the Phenopackets. required disease_analysis bool If True, check for complete disease records in the Phenopackets. required gene_identifier str Identifier for updating gene identifiers, if applicable. required hg19_template_vcf Path Path to the hg19 template VCF file (optional), to spike variants into required hg38_template_vcf Path Path to the hg38 template VCF file (optional), to spike variants into required hg19_vcf_dir Path Path to the directory containing hg19 template VCF files (optional). required hg38_vcf_dir Path Path to the directory containing hg38 template VCF files (optional). required output_dir Path The directory to save the prepared Phenopackets and, optionally, VCF files. required Notes: To spike variants into VCFs for variant-based analysis at least one of hg19_template_vcf, hg38_template_vcf, hg19_vcf_dir or hg38_vcf_dir is required. Source code in src/pheval/prepare/prepare_corpus.py 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 def prepare_corpus ( phenopacket_dir : Path , variant_analysis : bool , gene_analysis : bool , disease_analysis : bool , gene_identifier : str , hg19_template_vcf : Path , hg38_template_vcf : Path , hg19_vcf_dir : Path , hg38_vcf_dir : Path , output_dir : Path , ) -> None : \"\"\" Prepare a corpus of Phenopackets for analysis, optionally checking for complete variant records and updating gene identifiers. Args: phenopacket_dir (Path): The path to the directory containing Phenopackets. variant_analysis (bool): If True, check for complete variant records in the Phenopackets. gene_analysis (bool): If True, check for complete gene records in the Phenopackets. disease_analysis (bool): If True, check for complete disease records in the Phenopackets. gene_identifier (str): Identifier for updating gene identifiers, if applicable. hg19_template_vcf (Path): Path to the hg19 template VCF file (optional), to spike variants into VCFs for variant-based analysis at least one of hg19_template_vcf or hg38_template_vcf is required. hg38_template_vcf (Path): Path to the hg38 template VCF file (optional), to spike variants into VCFs for variant-based analysis at least one of hg19_template_vcf or hg38_template_vcf is required. hg19_vcf_dir (Path): Path to the directory containing hg19 template VCF files (optional). hg38_vcf_dir (Path): Path to the directory containing hg38 template VCF files (optional). output_dir (Path): The directory to save the prepared Phenopackets and, optionally, VCF files. Notes: To spike variants into VCFs for variant-based analysis at least one of hg19_template_vcf, hg38_template_vcf, hg19_vcf_dir or hg38_vcf_dir is required. \"\"\" output_dir . joinpath ( \"phenopackets\" ) . mkdir ( exist_ok = True , parents = True ) for phenopacket_path in all_files ( phenopacket_dir ): phenopacket_util = PhenopacketUtil ( phenopacket_reader ( phenopacket_path )) if not phenopacket_util . observed_phenotypic_features (): info_log . warning ( f \"Removed { phenopacket_path . name } from the corpus due to no observed phenotypic features.\" ) continue if variant_analysis : if phenopacket_util . check_incomplete_variant_record (): info_log . warning ( f \"Removed { phenopacket_path . name } from the corpus due to missing variant fields.\" ) continue if gene_analysis : if phenopacket_util . check_incomplete_gene_record (): info_log . warning ( f \"Removed { phenopacket_path . name } from the corpus due to missing gene fields.\" ) continue if disease_analysis : if phenopacket_util . check_incomplete_disease_record (): info_log . warning ( f \"Removed { phenopacket_path . name } from the corpus due to missing disease fields.\" ) continue if hg19_template_vcf or hg38_template_vcf : output_dir . joinpath ( \"vcf\" ) . mkdir ( exist_ok = True ) create_spiked_vcf ( output_dir . joinpath ( \"vcf\" ), phenopacket_path , hg19_template_vcf , hg38_template_vcf , hg19_vcf_dir , hg38_vcf_dir , ) if gene_identifier : create_updated_phenopacket ( gene_identifier , phenopacket_path , output_dir . joinpath ( \"phenopackets\" ) ) else : # if not updating phenopacket gene identifiers then copy phenopacket as is to output directory shutil . copy ( phenopacket_path , output_dir . joinpath ( f \"phenopackets/ { phenopacket_path . name } \" ) )","title":"prepare_corpus"},{"location":"api/pheval/prepare/update_phenopacket/","text":"create_updated_phenopacket ( gene_identifier , phenopacket_path , output_dir ) Update the gene context within the interpretations for a Phenopacket and writes the updated Phenopacket. Parameters: Name Type Description Default gene_identifier str Identifier used to update the gene context. required phenopacket_path Path The path to the input Phenopacket file. required output_dir Path The directory where the updated Phenopacket will be written. required Notes: The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. Source code in src/pheval/prepare/update_phenopacket.py 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 def create_updated_phenopacket ( gene_identifier : str , phenopacket_path : Path , output_dir : Path ) -> None : \"\"\" Update the gene context within the interpretations for a Phenopacket and writes the updated Phenopacket. Args: gene_identifier (str): Identifier used to update the gene context. phenopacket_path (Path): The path to the input Phenopacket file. output_dir (Path): The directory where the updated Phenopacket will be written. Notes: The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. \"\"\" hgnc_data = create_hgnc_dict () updated_phenopacket = update_outdated_gene_context ( phenopacket_path , gene_identifier , hgnc_data ) write_phenopacket ( updated_phenopacket , output_dir . joinpath ( phenopacket_path . name )) create_updated_phenopackets ( gene_identifier , phenopacket_dir , output_dir ) Update the gene context within the interpretations for a directory of Phenopackets and writes the updated Phenopackets. Parameters: Name Type Description Default gene_identifier str Identifier used to update the gene context. required phenopacket_dir Path The path to the input Phenopacket directory. required output_dir Path The directory where the updated Phenopackets will be written. required Notes: The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. Source code in src/pheval/prepare/update_phenopacket.py 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 def create_updated_phenopackets ( gene_identifier : str , phenopacket_dir : Path , output_dir : Path ) -> None : \"\"\" Update the gene context within the interpretations for a directory of Phenopackets and writes the updated Phenopackets. Args: gene_identifier (str): Identifier used to update the gene context. phenopacket_dir (Path): The path to the input Phenopacket directory. output_dir (Path): The directory where the updated Phenopackets will be written. Notes: The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. \"\"\" hgnc_data = create_hgnc_dict () for phenopacket_path in all_files ( phenopacket_dir ): updated_phenopacket = update_outdated_gene_context ( phenopacket_path , gene_identifier , hgnc_data ) write_phenopacket ( updated_phenopacket , output_dir . joinpath ( phenopacket_path . name )) update_outdated_gene_context ( phenopacket_path , gene_identifier , hgnc_data ) Update the gene context of the Phenopacket. Parameters: Name Type Description Default phenopacket_path Path The path to the Phenopacket file. required gene_identifier str Identifier to update the gene context. required hgnc_data defaultdict The HGNC data used for updating. required Returns: Type Description Union [ Phenopacket , Family ] Union[Phenopacket, Family]: The updated Phenopacket or Family. Notes: This function updates the gene context within the Phenopacket or Family instance. The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. Source code in src/pheval/prepare/update_phenopacket.py 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 def update_outdated_gene_context ( phenopacket_path : Path , gene_identifier : str , hgnc_data : defaultdict ) -> Union [ Phenopacket , Family ]: \"\"\" Update the gene context of the Phenopacket. Args: phenopacket_path (Path): The path to the Phenopacket file. gene_identifier (str): Identifier to update the gene context. hgnc_data (defaultdict): The HGNC data used for updating. Returns: Union[Phenopacket, Family]: The updated Phenopacket or Family. Notes: This function updates the gene context within the Phenopacket or Family instance. The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. \"\"\" phenopacket = phenopacket_reader ( phenopacket_path ) interpretations = PhenopacketUtil ( phenopacket ) . interpretations () updated_interpretations = GeneIdentifierUpdater ( hgnc_data = hgnc_data , gene_identifier = gene_identifier ) . update_genomic_interpretations_gene_identifier ( interpretations , phenopacket_path ) return PhenopacketRebuilder ( phenopacket ) . update_interpretations ( updated_interpretations ) update_phenopackets ( gene_identifier , phenopacket_path , phenopacket_dir , output_dir ) Update the gene identifiers in either a single phenopacket or a directory of phenopackets. Parameters: Name Type Description Default gene_identifier str The gene identifier to be updated. required phenopacket_path Path The path to a single Phenopacket file. required phenopacket_dir Path The directory containing multiple Phenopacket files. required output_dir Path The output directory to save the updated Phenopacket files. required Notes: The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. Source code in src/pheval/prepare/update_phenopacket.py 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 def update_phenopackets ( gene_identifier : str , phenopacket_path : Path , phenopacket_dir : Path , output_dir : Path ) -> None : \"\"\" Update the gene identifiers in either a single phenopacket or a directory of phenopackets. Args: gene_identifier (str): The gene identifier to be updated. phenopacket_path (Path): The path to a single Phenopacket file. phenopacket_dir (Path): The directory containing multiple Phenopacket files. output_dir (Path): The output directory to save the updated Phenopacket files. Notes: The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. \"\"\" output_dir . mkdir ( exist_ok = True ) if phenopacket_path is not None : create_updated_phenopacket ( gene_identifier , phenopacket_path , output_dir ) elif phenopacket_dir is not None : create_updated_phenopackets ( gene_identifier , phenopacket_dir , output_dir )","title":"Update phenopacket"},{"location":"api/pheval/prepare/update_phenopacket/#src.pheval.prepare.update_phenopacket.create_updated_phenopacket","text":"Update the gene context within the interpretations for a Phenopacket and writes the updated Phenopacket. Parameters: Name Type Description Default gene_identifier str Identifier used to update the gene context. required phenopacket_path Path The path to the input Phenopacket file. required output_dir Path The directory where the updated Phenopacket will be written. required Notes: The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. Source code in src/pheval/prepare/update_phenopacket.py 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 def create_updated_phenopacket ( gene_identifier : str , phenopacket_path : Path , output_dir : Path ) -> None : \"\"\" Update the gene context within the interpretations for a Phenopacket and writes the updated Phenopacket. Args: gene_identifier (str): Identifier used to update the gene context. phenopacket_path (Path): The path to the input Phenopacket file. output_dir (Path): The directory where the updated Phenopacket will be written. Notes: The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. \"\"\" hgnc_data = create_hgnc_dict () updated_phenopacket = update_outdated_gene_context ( phenopacket_path , gene_identifier , hgnc_data ) write_phenopacket ( updated_phenopacket , output_dir . joinpath ( phenopacket_path . name ))","title":"create_updated_phenopacket"},{"location":"api/pheval/prepare/update_phenopacket/#src.pheval.prepare.update_phenopacket.create_updated_phenopackets","text":"Update the gene context within the interpretations for a directory of Phenopackets and writes the updated Phenopackets. Parameters: Name Type Description Default gene_identifier str Identifier used to update the gene context. required phenopacket_dir Path The path to the input Phenopacket directory. required output_dir Path The directory where the updated Phenopackets will be written. required Notes: The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. Source code in src/pheval/prepare/update_phenopacket.py 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 def create_updated_phenopackets ( gene_identifier : str , phenopacket_dir : Path , output_dir : Path ) -> None : \"\"\" Update the gene context within the interpretations for a directory of Phenopackets and writes the updated Phenopackets. Args: gene_identifier (str): Identifier used to update the gene context. phenopacket_dir (Path): The path to the input Phenopacket directory. output_dir (Path): The directory where the updated Phenopackets will be written. Notes: The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. \"\"\" hgnc_data = create_hgnc_dict () for phenopacket_path in all_files ( phenopacket_dir ): updated_phenopacket = update_outdated_gene_context ( phenopacket_path , gene_identifier , hgnc_data ) write_phenopacket ( updated_phenopacket , output_dir . joinpath ( phenopacket_path . name ))","title":"create_updated_phenopackets"},{"location":"api/pheval/prepare/update_phenopacket/#src.pheval.prepare.update_phenopacket.update_outdated_gene_context","text":"Update the gene context of the Phenopacket. Parameters: Name Type Description Default phenopacket_path Path The path to the Phenopacket file. required gene_identifier str Identifier to update the gene context. required hgnc_data defaultdict The HGNC data used for updating. required Returns: Type Description Union [ Phenopacket , Family ] Union[Phenopacket, Family]: The updated Phenopacket or Family. Notes: This function updates the gene context within the Phenopacket or Family instance. The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. Source code in src/pheval/prepare/update_phenopacket.py 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 def update_outdated_gene_context ( phenopacket_path : Path , gene_identifier : str , hgnc_data : defaultdict ) -> Union [ Phenopacket , Family ]: \"\"\" Update the gene context of the Phenopacket. Args: phenopacket_path (Path): The path to the Phenopacket file. gene_identifier (str): Identifier to update the gene context. hgnc_data (defaultdict): The HGNC data used for updating. Returns: Union[Phenopacket, Family]: The updated Phenopacket or Family. Notes: This function updates the gene context within the Phenopacket or Family instance. The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. \"\"\" phenopacket = phenopacket_reader ( phenopacket_path ) interpretations = PhenopacketUtil ( phenopacket ) . interpretations () updated_interpretations = GeneIdentifierUpdater ( hgnc_data = hgnc_data , gene_identifier = gene_identifier ) . update_genomic_interpretations_gene_identifier ( interpretations , phenopacket_path ) return PhenopacketRebuilder ( phenopacket ) . update_interpretations ( updated_interpretations )","title":"update_outdated_gene_context"},{"location":"api/pheval/prepare/update_phenopacket/#src.pheval.prepare.update_phenopacket.update_phenopackets","text":"Update the gene identifiers in either a single phenopacket or a directory of phenopackets. Parameters: Name Type Description Default gene_identifier str The gene identifier to be updated. required phenopacket_path Path The path to a single Phenopacket file. required phenopacket_dir Path The directory containing multiple Phenopacket files. required output_dir Path The output directory to save the updated Phenopacket files. required Notes: The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. Source code in src/pheval/prepare/update_phenopacket.py 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 def update_phenopackets ( gene_identifier : str , phenopacket_path : Path , phenopacket_dir : Path , output_dir : Path ) -> None : \"\"\" Update the gene identifiers in either a single phenopacket or a directory of phenopackets. Args: gene_identifier (str): The gene identifier to be updated. phenopacket_path (Path): The path to a single Phenopacket file. phenopacket_dir (Path): The directory containing multiple Phenopacket files. output_dir (Path): The output directory to save the updated Phenopacket files. Notes: The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace to describe the gene identifiers. \"\"\" output_dir . mkdir ( exist_ok = True ) if phenopacket_path is not None : create_updated_phenopacket ( gene_identifier , phenopacket_path , output_dir ) elif phenopacket_dir is not None : create_updated_phenopackets ( gene_identifier , phenopacket_dir , output_dir )","title":"update_phenopackets"},{"location":"api/pheval/runners/runner/","text":"Runners Module DefaultPhEvalRunner Bases: PhEvalRunner DefaultPhEvalRunner Parameters: Name Type Description Default PhEvalRunner PhEvalRunner Abstract PhEvalRunnerClass required Source code in src/pheval/runners/runner.py 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 class DefaultPhEvalRunner ( PhEvalRunner ): \"\"\"DefaultPhEvalRunner Args: PhEvalRunner (PhEvalRunner): Abstract PhEvalRunnerClass \"\"\" input_dir : Path testdata_dir : Path tmp_dir : Path output_dir : Path config_file : Path version : str def prepare ( self ): print ( \"preparing\" ) def run ( self ): print ( \"running\" ) def post_process ( self ): print ( \"post processing\" ) PhEvalRunner dataclass Bases: ABC PhEvalRunner Class Source code in src/pheval/runners/runner.py 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 @dataclass class PhEvalRunner ( ABC ): \"\"\"PhEvalRunner Class\"\"\" input_dir : Path testdata_dir : Path tmp_dir : Path output_dir : Path config_file : Path version : str directory_path = None input_dir_config = None _meta_data = None __raw_results_dir = \"raw_results/\" __pheval_gene_results_dir = \"pheval_gene_results/\" __pheval_variant_results_dir = \"pheval_variant_results/\" __pheval_disease_results_dir = \"pheval_disease_results/\" __tool_input_commands_dir = \"tool_input_commands/\" __run_meta_data_file = \"results.yml\" def __post_init__ ( self ): self . input_dir_config = parse_input_dir_config ( self . input_dir ) def _get_tool ( self ): return self . input_dir_config . tool def _get_variant_analysis ( self ): return self . input_dir_config . variant_analysis def _get_gene_analysis ( self ): return self . input_dir_config . gene_analysis def _get_disease_analysis ( self ): return self . input_dir_config . disease_analysis @property def tool_input_commands_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __tool_input_commands_dir ) @tool_input_commands_dir . setter def tool_input_commands_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def raw_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __raw_results_dir ) @raw_results_dir . setter def raw_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def pheval_gene_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __pheval_gene_results_dir ) @pheval_gene_results_dir . setter def pheval_gene_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def pheval_variant_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __pheval_variant_results_dir ) @pheval_variant_results_dir . setter def pheval_variant_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def pheval_disease_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __pheval_disease_results_dir ) @pheval_disease_results_dir . setter def pheval_disease_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) def build_output_directory_structure ( self ): \"\"\"build output directory structure\"\"\" self . tool_input_commands_dir . mkdir ( exist_ok = True ) self . raw_results_dir . mkdir ( exist_ok = True ) if self . _get_variant_analysis (): self . pheval_variant_results_dir . mkdir ( exist_ok = True ) if self . _get_gene_analysis (): self . pheval_gene_results_dir . mkdir ( exist_ok = True ) if self . _get_disease_analysis (): self . pheval_disease_results_dir . mkdir ( exist_ok = True ) @property def meta_data ( self ): self . _meta_data = BasicOutputRunMetaData ( tool = self . input_dir_config . tool , tool_version = self . version , config = f \" { Path ( self . input_dir ) . parent . name } / { Path ( self . input_dir ) . name } \" , run_timestamp = datetime . now () . timestamp (), corpus = f \" { Path ( self . testdata_dir ) . parent . name } / { Path ( self . testdata_dir ) . name } \" , ) return self . _meta_data @meta_data . setter def meta_data ( self , meta_data ): self . _meta_data = meta_data @abstractmethod def prepare ( self ) -> str : \"\"\"prepare\"\"\" @abstractmethod def run ( self ): \"\"\"run\"\"\" @abstractmethod def post_process ( self ): \"\"\"post_process\"\"\" def construct_meta_data ( self ): \"\"\"Construct run output meta data\"\"\" return self . meta_data build_output_directory_structure () build output directory structure Source code in src/pheval/runners/runner.py 87 88 89 90 91 92 93 94 95 96 def build_output_directory_structure ( self ): \"\"\"build output directory structure\"\"\" self . tool_input_commands_dir . mkdir ( exist_ok = True ) self . raw_results_dir . mkdir ( exist_ok = True ) if self . _get_variant_analysis (): self . pheval_variant_results_dir . mkdir ( exist_ok = True ) if self . _get_gene_analysis (): self . pheval_gene_results_dir . mkdir ( exist_ok = True ) if self . _get_disease_analysis (): self . pheval_disease_results_dir . mkdir ( exist_ok = True ) construct_meta_data () Construct run output meta data Source code in src/pheval/runners/runner.py 125 126 127 def construct_meta_data ( self ): \"\"\"Construct run output meta data\"\"\" return self . meta_data post_process () abstractmethod post_process Source code in src/pheval/runners/runner.py 121 122 123 @abstractmethod def post_process ( self ): \"\"\"post_process\"\"\" prepare () abstractmethod prepare Source code in src/pheval/runners/runner.py 113 114 115 @abstractmethod def prepare ( self ) -> str : \"\"\"prepare\"\"\" run () abstractmethod run Source code in src/pheval/runners/runner.py 117 118 119 @abstractmethod def run ( self ): \"\"\"run\"\"\"","title":"Runner"},{"location":"api/pheval/runners/runner/#src.pheval.runners.runner.DefaultPhEvalRunner","text":"Bases: PhEvalRunner DefaultPhEvalRunner Parameters: Name Type Description Default PhEvalRunner PhEvalRunner Abstract PhEvalRunnerClass required Source code in src/pheval/runners/runner.py 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 class DefaultPhEvalRunner ( PhEvalRunner ): \"\"\"DefaultPhEvalRunner Args: PhEvalRunner (PhEvalRunner): Abstract PhEvalRunnerClass \"\"\" input_dir : Path testdata_dir : Path tmp_dir : Path output_dir : Path config_file : Path version : str def prepare ( self ): print ( \"preparing\" ) def run ( self ): print ( \"running\" ) def post_process ( self ): print ( \"post processing\" )","title":"DefaultPhEvalRunner"},{"location":"api/pheval/runners/runner/#src.pheval.runners.runner.PhEvalRunner","text":"Bases: ABC PhEvalRunner Class Source code in src/pheval/runners/runner.py 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 @dataclass class PhEvalRunner ( ABC ): \"\"\"PhEvalRunner Class\"\"\" input_dir : Path testdata_dir : Path tmp_dir : Path output_dir : Path config_file : Path version : str directory_path = None input_dir_config = None _meta_data = None __raw_results_dir = \"raw_results/\" __pheval_gene_results_dir = \"pheval_gene_results/\" __pheval_variant_results_dir = \"pheval_variant_results/\" __pheval_disease_results_dir = \"pheval_disease_results/\" __tool_input_commands_dir = \"tool_input_commands/\" __run_meta_data_file = \"results.yml\" def __post_init__ ( self ): self . input_dir_config = parse_input_dir_config ( self . input_dir ) def _get_tool ( self ): return self . input_dir_config . tool def _get_variant_analysis ( self ): return self . input_dir_config . variant_analysis def _get_gene_analysis ( self ): return self . input_dir_config . gene_analysis def _get_disease_analysis ( self ): return self . input_dir_config . disease_analysis @property def tool_input_commands_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __tool_input_commands_dir ) @tool_input_commands_dir . setter def tool_input_commands_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def raw_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __raw_results_dir ) @raw_results_dir . setter def raw_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def pheval_gene_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __pheval_gene_results_dir ) @pheval_gene_results_dir . setter def pheval_gene_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def pheval_variant_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __pheval_variant_results_dir ) @pheval_variant_results_dir . setter def pheval_variant_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) @property def pheval_disease_results_dir ( self ): return Path ( self . output_dir ) . joinpath ( self . __pheval_disease_results_dir ) @pheval_disease_results_dir . setter def pheval_disease_results_dir ( self , directory_path ): self . directory_path = Path ( directory_path ) def build_output_directory_structure ( self ): \"\"\"build output directory structure\"\"\" self . tool_input_commands_dir . mkdir ( exist_ok = True ) self . raw_results_dir . mkdir ( exist_ok = True ) if self . _get_variant_analysis (): self . pheval_variant_results_dir . mkdir ( exist_ok = True ) if self . _get_gene_analysis (): self . pheval_gene_results_dir . mkdir ( exist_ok = True ) if self . _get_disease_analysis (): self . pheval_disease_results_dir . mkdir ( exist_ok = True ) @property def meta_data ( self ): self . _meta_data = BasicOutputRunMetaData ( tool = self . input_dir_config . tool , tool_version = self . version , config = f \" { Path ( self . input_dir ) . parent . name } / { Path ( self . input_dir ) . name } \" , run_timestamp = datetime . now () . timestamp (), corpus = f \" { Path ( self . testdata_dir ) . parent . name } / { Path ( self . testdata_dir ) . name } \" , ) return self . _meta_data @meta_data . setter def meta_data ( self , meta_data ): self . _meta_data = meta_data @abstractmethod def prepare ( self ) -> str : \"\"\"prepare\"\"\" @abstractmethod def run ( self ): \"\"\"run\"\"\" @abstractmethod def post_process ( self ): \"\"\"post_process\"\"\" def construct_meta_data ( self ): \"\"\"Construct run output meta data\"\"\" return self . meta_data","title":"PhEvalRunner"},{"location":"api/pheval/runners/runner/#src.pheval.runners.runner.PhEvalRunner.build_output_directory_structure","text":"build output directory structure Source code in src/pheval/runners/runner.py 87 88 89 90 91 92 93 94 95 96 def build_output_directory_structure ( self ): \"\"\"build output directory structure\"\"\" self . tool_input_commands_dir . mkdir ( exist_ok = True ) self . raw_results_dir . mkdir ( exist_ok = True ) if self . _get_variant_analysis (): self . pheval_variant_results_dir . mkdir ( exist_ok = True ) if self . _get_gene_analysis (): self . pheval_gene_results_dir . mkdir ( exist_ok = True ) if self . _get_disease_analysis (): self . pheval_disease_results_dir . mkdir ( exist_ok = True )","title":"build_output_directory_structure"},{"location":"api/pheval/runners/runner/#src.pheval.runners.runner.PhEvalRunner.construct_meta_data","text":"Construct run output meta data Source code in src/pheval/runners/runner.py 125 126 127 def construct_meta_data ( self ): \"\"\"Construct run output meta data\"\"\" return self . meta_data","title":"construct_meta_data"},{"location":"api/pheval/runners/runner/#src.pheval.runners.runner.PhEvalRunner.post_process","text":"post_process Source code in src/pheval/runners/runner.py 121 122 123 @abstractmethod def post_process ( self ): \"\"\"post_process\"\"\"","title":"post_process"},{"location":"api/pheval/runners/runner/#src.pheval.runners.runner.PhEvalRunner.prepare","text":"prepare Source code in src/pheval/runners/runner.py 113 114 115 @abstractmethod def prepare ( self ) -> str : \"\"\"prepare\"\"\"","title":"prepare"},{"location":"api/pheval/runners/runner/#src.pheval.runners.runner.PhEvalRunner.run","text":"run Source code in src/pheval/runners/runner.py 117 118 119 @abstractmethod def run ( self ): \"\"\"run\"\"\"","title":"run"},{"location":"api/pheval/utils/exomiser/","text":"semsim_to_exomiserdb ( input_path , object_prefix , subject_prefix , db_path ) ingests semsim file into exomiser phenotypic database Parameters: Name Type Description Default input_path Path semsim input file. e.g phenio-plus-hp-mp.0.semsimian.tsv required object_prefix str object prefix. e.g. MP required subject_prefix str subject prefix e.g HP required db_path Path Exomiser Phenotypic Database Folder Path. (e.g. /exomiser_folder/2209_phenotype/2209_phenotype/) required Source code in src/pheval/utils/exomiser.py 6 7 8 9 10 11 12 13 14 15 16 def semsim_to_exomiserdb ( input_path : Path , object_prefix : str , subject_prefix : str , db_path : Path ): \"\"\"ingests semsim file into exomiser phenotypic database Args: input_path (Path): semsim input file. e.g phenio-plus-hp-mp.0.semsimian.tsv object_prefix (str): object prefix. e.g. MP subject_prefix (str): subject prefix e.g HP db_path (Path): Exomiser Phenotypic Database Folder Path. (e.g. /exomiser_folder/2209_phenotype/2209_phenotype/) \"\"\" exomiserdb = ExomiserDB ( db_path ) exomiserdb . import_from_semsim_file ( input_path , object_prefix , subject_prefix )","title":"Exomiser"},{"location":"api/pheval/utils/exomiser/#src.pheval.utils.exomiser.semsim_to_exomiserdb","text":"ingests semsim file into exomiser phenotypic database Parameters: Name Type Description Default input_path Path semsim input file. e.g phenio-plus-hp-mp.0.semsimian.tsv required object_prefix str object prefix. e.g. MP required subject_prefix str subject prefix e.g HP required db_path Path Exomiser Phenotypic Database Folder Path. (e.g. /exomiser_folder/2209_phenotype/2209_phenotype/) required Source code in src/pheval/utils/exomiser.py 6 7 8 9 10 11 12 13 14 15 16 def semsim_to_exomiserdb ( input_path : Path , object_prefix : str , subject_prefix : str , db_path : Path ): \"\"\"ingests semsim file into exomiser phenotypic database Args: input_path (Path): semsim input file. e.g phenio-plus-hp-mp.0.semsimian.tsv object_prefix (str): object prefix. e.g. MP subject_prefix (str): subject prefix e.g HP db_path (Path): Exomiser Phenotypic Database Folder Path. (e.g. /exomiser_folder/2209_phenotype/2209_phenotype/) \"\"\" exomiserdb = ExomiserDB ( db_path ) exomiserdb . import_from_semsim_file ( input_path , object_prefix , subject_prefix )","title":"semsim_to_exomiserdb"},{"location":"api/pheval/utils/file_utils/","text":"all_files ( directory ) Obtains all files from a given directory. Parameters: Name Type Description Default directory Path The directory path. required Returns: Type Description list [ Path ] list[Path]: A list of Path objects representing all files in the directory. Source code in src/pheval/utils/file_utils.py 31 32 33 34 35 36 37 38 39 40 41 42 43 def all_files ( directory : Path ) -> list [ Path ]: \"\"\" Obtains all files from a given directory. Args: directory (Path): The directory path. Returns: list[Path]: A list of Path objects representing all files in the directory. \"\"\" files = [ file_path for file_path in directory . iterdir ()] files . sort () return files ensure_columns_exists ( cols , dataframes , err_message = '' ) Ensures the columns exist in dataframes passed as argument (e.g) \" ensure_columns_exists( cols=['column_a', 'column_b, 'column_c'], err_message=\"Custom error message if any column doesn't exist in any dataframe passed as argument\", dataframes=[data_frame1, data_frame2], ) \" Source code in src/pheval/utils/file_utils.py 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 def ensure_columns_exists ( cols : list , dataframes : List [ pd . DataFrame ], err_message : str = \"\" ): \"\"\"Ensures the columns exist in dataframes passed as argument (e.g) \" ensure_columns_exists( cols=['column_a', 'column_b, 'column_c'], err_message=\"Custom error message if any column doesn't exist in any dataframe passed as argument\", dataframes=[data_frame1, data_frame2], ) \" \"\"\" flat_cols = list ( itertools . chain ( cols )) if not dataframes or not flat_cols : return if err_message : err_msg = f \"\"\"columns: { \", \" . join ( flat_cols [: - 1 ]) } and { flat_cols [ - 1 ] } { err_message } \"\"\" else : err_msg = f \"\"\"columns: { \", \" . join ( flat_cols [: - 1 ]) } and { flat_cols [ - 1 ] } \\ - must be present in both left and right files\"\"\" for dataframe in dataframes : if not all ( x in dataframe . columns for x in flat_cols ): raise ValueError ( err_msg ) ensure_file_exists ( * files ) Ensures the existence of files passed as parameter Raises: FileNotFoundError: If any file passed as a parameter doesn't exist a FileNotFound Exception will be raised Source code in src/pheval/utils/file_utils.py 73 74 75 76 77 78 79 80 def ensure_file_exists ( * files : str ): \"\"\"Ensures the existence of files passed as parameter Raises: FileNotFoundError: If any file passed as a parameter doesn't exist a FileNotFound Exception will be raised \"\"\" for file in files : if not path . isfile ( file ): raise FileNotFoundError ( f \"File { file } not found\" ) files_with_suffix ( directory , suffix ) Obtains all files ending in a specified suffix from a given directory. Parameters: Name Type Description Default directory Path The directory path. required suffix str The specified suffix to filter files. required Returns: Type Description list [ Path ] list[Path]: A list of Path objects representing files with the specified suffix. Source code in src/pheval/utils/file_utils.py 15 16 17 18 19 20 21 22 23 24 25 26 27 28 def files_with_suffix ( directory : Path , suffix : str ) -> list [ Path ]: \"\"\" Obtains all files ending in a specified suffix from a given directory. Args: directory (Path): The directory path. suffix (str): The specified suffix to filter files. Returns: list[Path]: A list of Path objects representing files with the specified suffix. \"\"\" files = [ file_path for file_path in directory . iterdir () if file_path . suffix == suffix ] files . sort () return files is_gzipped ( file_path ) Confirms whether a file is gzipped. Parameters: Name Type Description Default file_path Path The path to the file. required Returns: Name Type Description bool bool True if the file is gzipped, False otherwise. Source code in src/pheval/utils/file_utils.py 46 47 48 49 50 51 52 53 54 55 56 def is_gzipped ( file_path : Path ) -> bool : \"\"\" Confirms whether a file is gzipped. Args: file_path (Path): The path to the file. Returns: bool: True if the file is gzipped, False otherwise. \"\"\" return file_path . name . endswith ( \".gz\" ) normalise_file_name ( file_path ) Normalises the file name by removing diacritical marks (accents) from Unicode characters. Parameters: Name Type Description Default file_path Path The path to the file. required Returns: Name Type Description str str The normalised file name without diacritical marks. Source code in src/pheval/utils/file_utils.py 59 60 61 62 63 64 65 66 67 68 69 70 def normalise_file_name ( file_path : Path ) -> str : \"\"\" Normalises the file name by removing diacritical marks (accents) from Unicode characters. Args: file_path (Path): The path to the file. Returns: str: The normalised file name without diacritical marks. \"\"\" normalised_file_name = unicodedata . normalize ( \"NFD\" , str ( file_path )) return re . sub ( \"[ \\u0300 - \\u036f ]\" , \"\" , normalised_file_name ) write_metadata ( output_dir , meta_data ) Write the metadata for a run to a YAML file. Parameters: Name Type Description Default output_dir Path The directory where the metadata file will be saved. required meta_data BasicOutputRunMetaData The metadata to be written. required Source code in src/pheval/utils/file_utils.py 108 109 110 111 112 113 114 115 116 117 118 def write_metadata ( output_dir : Path , meta_data : BasicOutputRunMetaData ) -> None : \"\"\" Write the metadata for a run to a YAML file. Args: output_dir (Path): The directory where the metadata file will be saved. meta_data (BasicOutputRunMetaData): The metadata to be written. \"\"\" with open ( Path ( output_dir ) . joinpath ( \"results.yml\" ), \"w\" ) as metadata_file : yaml . dump ( to_dict ( meta_data ), metadata_file , sort_keys = False , default_style = \"\" ) metadata_file . close ()","title":"File utils"},{"location":"api/pheval/utils/file_utils/#src.pheval.utils.file_utils.all_files","text":"Obtains all files from a given directory. Parameters: Name Type Description Default directory Path The directory path. required Returns: Type Description list [ Path ] list[Path]: A list of Path objects representing all files in the directory. Source code in src/pheval/utils/file_utils.py 31 32 33 34 35 36 37 38 39 40 41 42 43 def all_files ( directory : Path ) -> list [ Path ]: \"\"\" Obtains all files from a given directory. Args: directory (Path): The directory path. Returns: list[Path]: A list of Path objects representing all files in the directory. \"\"\" files = [ file_path for file_path in directory . iterdir ()] files . sort () return files","title":"all_files"},{"location":"api/pheval/utils/file_utils/#src.pheval.utils.file_utils.ensure_columns_exists","text":"Ensures the columns exist in dataframes passed as argument (e.g) \" ensure_columns_exists( cols=['column_a', 'column_b, 'column_c'], err_message=\"Custom error message if any column doesn't exist in any dataframe passed as argument\", dataframes=[data_frame1, data_frame2], ) \" Source code in src/pheval/utils/file_utils.py 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 def ensure_columns_exists ( cols : list , dataframes : List [ pd . DataFrame ], err_message : str = \"\" ): \"\"\"Ensures the columns exist in dataframes passed as argument (e.g) \" ensure_columns_exists( cols=['column_a', 'column_b, 'column_c'], err_message=\"Custom error message if any column doesn't exist in any dataframe passed as argument\", dataframes=[data_frame1, data_frame2], ) \" \"\"\" flat_cols = list ( itertools . chain ( cols )) if not dataframes or not flat_cols : return if err_message : err_msg = f \"\"\"columns: { \", \" . join ( flat_cols [: - 1 ]) } and { flat_cols [ - 1 ] } { err_message } \"\"\" else : err_msg = f \"\"\"columns: { \", \" . join ( flat_cols [: - 1 ]) } and { flat_cols [ - 1 ] } \\ - must be present in both left and right files\"\"\" for dataframe in dataframes : if not all ( x in dataframe . columns for x in flat_cols ): raise ValueError ( err_msg )","title":"ensure_columns_exists"},{"location":"api/pheval/utils/file_utils/#src.pheval.utils.file_utils.ensure_file_exists","text":"Ensures the existence of files passed as parameter Raises: FileNotFoundError: If any file passed as a parameter doesn't exist a FileNotFound Exception will be raised Source code in src/pheval/utils/file_utils.py 73 74 75 76 77 78 79 80 def ensure_file_exists ( * files : str ): \"\"\"Ensures the existence of files passed as parameter Raises: FileNotFoundError: If any file passed as a parameter doesn't exist a FileNotFound Exception will be raised \"\"\" for file in files : if not path . isfile ( file ): raise FileNotFoundError ( f \"File { file } not found\" )","title":"ensure_file_exists"},{"location":"api/pheval/utils/file_utils/#src.pheval.utils.file_utils.files_with_suffix","text":"Obtains all files ending in a specified suffix from a given directory. Parameters: Name Type Description Default directory Path The directory path. required suffix str The specified suffix to filter files. required Returns: Type Description list [ Path ] list[Path]: A list of Path objects representing files with the specified suffix. Source code in src/pheval/utils/file_utils.py 15 16 17 18 19 20 21 22 23 24 25 26 27 28 def files_with_suffix ( directory : Path , suffix : str ) -> list [ Path ]: \"\"\" Obtains all files ending in a specified suffix from a given directory. Args: directory (Path): The directory path. suffix (str): The specified suffix to filter files. Returns: list[Path]: A list of Path objects representing files with the specified suffix. \"\"\" files = [ file_path for file_path in directory . iterdir () if file_path . suffix == suffix ] files . sort () return files","title":"files_with_suffix"},{"location":"api/pheval/utils/file_utils/#src.pheval.utils.file_utils.is_gzipped","text":"Confirms whether a file is gzipped. Parameters: Name Type Description Default file_path Path The path to the file. required Returns: Name Type Description bool bool True if the file is gzipped, False otherwise. Source code in src/pheval/utils/file_utils.py 46 47 48 49 50 51 52 53 54 55 56 def is_gzipped ( file_path : Path ) -> bool : \"\"\" Confirms whether a file is gzipped. Args: file_path (Path): The path to the file. Returns: bool: True if the file is gzipped, False otherwise. \"\"\" return file_path . name . endswith ( \".gz\" )","title":"is_gzipped"},{"location":"api/pheval/utils/file_utils/#src.pheval.utils.file_utils.normalise_file_name","text":"Normalises the file name by removing diacritical marks (accents) from Unicode characters. Parameters: Name Type Description Default file_path Path The path to the file. required Returns: Name Type Description str str The normalised file name without diacritical marks. Source code in src/pheval/utils/file_utils.py 59 60 61 62 63 64 65 66 67 68 69 70 def normalise_file_name ( file_path : Path ) -> str : \"\"\" Normalises the file name by removing diacritical marks (accents) from Unicode characters. Args: file_path (Path): The path to the file. Returns: str: The normalised file name without diacritical marks. \"\"\" normalised_file_name = unicodedata . normalize ( \"NFD\" , str ( file_path )) return re . sub ( \"[ \\u0300 - \\u036f ]\" , \"\" , normalised_file_name )","title":"normalise_file_name"},{"location":"api/pheval/utils/file_utils/#src.pheval.utils.file_utils.write_metadata","text":"Write the metadata for a run to a YAML file. Parameters: Name Type Description Default output_dir Path The directory where the metadata file will be saved. required meta_data BasicOutputRunMetaData The metadata to be written. required Source code in src/pheval/utils/file_utils.py 108 109 110 111 112 113 114 115 116 117 118 def write_metadata ( output_dir : Path , meta_data : BasicOutputRunMetaData ) -> None : \"\"\" Write the metadata for a run to a YAML file. Args: output_dir (Path): The directory where the metadata file will be saved. meta_data (BasicOutputRunMetaData): The metadata to be written. \"\"\" with open ( Path ( output_dir ) . joinpath ( \"results.yml\" ), \"w\" ) as metadata_file : yaml . dump ( to_dict ( meta_data ), metadata_file , sort_keys = False , default_style = \"\" ) metadata_file . close ()","title":"write_metadata"},{"location":"api/pheval/utils/phenopacket_utils/","text":"GeneIdentifierUpdater Class for updating gene identifiers within genomic interpretations. Source code in src/pheval/utils/phenopacket_utils.py 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 class GeneIdentifierUpdater : \"\"\"Class for updating gene identifiers within genomic interpretations.\"\"\" def __init__ ( self , gene_identifier : str , hgnc_data : dict = None , identifier_map : dict = None ): \"\"\" Initialise the GeneIdentifierUpdater. Args: gene_identifier (str): The gene identifier to update to. hgnc_data (dict): A dictionary containing HGNC data (default: None). identifier_map (dict): A dictionary mapping gene identifiers (default: None). \"\"\" self . hgnc_data = hgnc_data self . gene_identifier = gene_identifier self . identifier_map = identifier_map def find_identifier ( self , gene_symbol : str ) -> str : \"\"\" Find the specified gene identifier for a gene symbol. Args: gene_symbol (str): The gene symbol to find the identifier for. Returns: str: The identified gene identifier. \"\"\" if gene_symbol in self . hgnc_data . keys (): return self . hgnc_data [ gene_symbol ][ self . gene_identifier ] else : for _symbol , data in self . hgnc_data . items (): for prev_symbol in data [ \"previous_symbol\" ]: if prev_symbol == gene_symbol : return data [ self . gene_identifier ] def obtain_gene_symbol_from_identifier ( self , query_gene_identifier : str ) -> str : \"\"\" Obtain gene symbol from a gene identifier. Args: query_gene_identifier (str): The gene identifier. Returns: str: The gene symbol corresponding to the identifier. \"\"\" return self . identifier_map [ query_gene_identifier ] def _find_alternate_ids ( self , gene_symbol : str ) -> List [ str ]: \"\"\" Find the alternate IDs for a gene symbol. Args: gene_symbol (str): The gene symbol to find alternate IDs for. Returns: List[str]: List of alternate IDs for the gene symbol. \"\"\" if gene_symbol in self . hgnc_data . keys (): return [ self . hgnc_data [ gene_symbol ][ \"hgnc_id\" ], \"ncbigene:\" + self . hgnc_data [ gene_symbol ][ \"entrez_id\" ], \"ensembl:\" + self . hgnc_data [ gene_symbol ][ \"ensembl_id\" ], \"symbol:\" + gene_symbol , ] else : for symbol , data in self . hgnc_data . items (): for prev_symbol in data [ \"previous_symbol\" ]: if prev_symbol == gene_symbol : return [ data [ \"hgnc_id\" ], \"ncbigene:\" + data [ \"entrez_id\" ], \"ensembl:\" + data [ \"ensembl_id\" ], \"symbol:\" + symbol , ] def update_genomic_interpretations_gene_identifier ( self , interpretations : List [ Interpretation ], phenopacket_path : Path ) -> List [ Interpretation ]: \"\"\" Update the genomic interpretations of a Phenopacket. Args: interpretations (List[Interpretation]): List of Interpretation objects. Returns: List[Interpretation]: Updated list of Interpretation objects. \"\"\" updated_interpretations = copy ( list ( interpretations )) for updated_interpretation in updated_interpretations : for g in updated_interpretation . diagnosis . genomic_interpretations : updated_gene_identifier = self . find_identifier ( g . variant_interpretation . variation_descriptor . gene_context . symbol ) info_log . info ( f \"Updating gene identifier in { phenopacket_path } from \" f \" { g . variant_interpretation . variation_descriptor . gene_context . value_id } \" f \"to { updated_gene_identifier } \" ) g . variant_interpretation . variation_descriptor . gene_context . value_id = ( updated_gene_identifier ) del g . variant_interpretation . variation_descriptor . gene_context . alternate_ids [:] g . variant_interpretation . variation_descriptor . gene_context . alternate_ids . extend ( self . _find_alternate_ids ( g . variant_interpretation . variation_descriptor . gene_context . symbol ) ) return updated_interpretations __init__ ( gene_identifier , hgnc_data = None , identifier_map = None ) Initialise the GeneIdentifierUpdater. Parameters: Name Type Description Default gene_identifier str The gene identifier to update to. required hgnc_data dict A dictionary containing HGNC data (default: None). None identifier_map dict A dictionary mapping gene identifiers (default: None). None Source code in src/pheval/utils/phenopacket_utils.py 641 642 643 644 645 646 647 648 649 650 651 652 653 def __init__ ( self , gene_identifier : str , hgnc_data : dict = None , identifier_map : dict = None ): \"\"\" Initialise the GeneIdentifierUpdater. Args: gene_identifier (str): The gene identifier to update to. hgnc_data (dict): A dictionary containing HGNC data (default: None). identifier_map (dict): A dictionary mapping gene identifiers (default: None). \"\"\" self . hgnc_data = hgnc_data self . gene_identifier = gene_identifier self . identifier_map = identifier_map find_identifier ( gene_symbol ) Find the specified gene identifier for a gene symbol. Parameters: Name Type Description Default gene_symbol str The gene symbol to find the identifier for. required Returns: Name Type Description str str The identified gene identifier. Source code in src/pheval/utils/phenopacket_utils.py 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 def find_identifier ( self , gene_symbol : str ) -> str : \"\"\" Find the specified gene identifier for a gene symbol. Args: gene_symbol (str): The gene symbol to find the identifier for. Returns: str: The identified gene identifier. \"\"\" if gene_symbol in self . hgnc_data . keys (): return self . hgnc_data [ gene_symbol ][ self . gene_identifier ] else : for _symbol , data in self . hgnc_data . items (): for prev_symbol in data [ \"previous_symbol\" ]: if prev_symbol == gene_symbol : return data [ self . gene_identifier ] obtain_gene_symbol_from_identifier ( query_gene_identifier ) Obtain gene symbol from a gene identifier. Parameters: Name Type Description Default query_gene_identifier str The gene identifier. required Returns: Name Type Description str str The gene symbol corresponding to the identifier. Source code in src/pheval/utils/phenopacket_utils.py 673 674 675 676 677 678 679 680 681 682 683 def obtain_gene_symbol_from_identifier ( self , query_gene_identifier : str ) -> str : \"\"\" Obtain gene symbol from a gene identifier. Args: query_gene_identifier (str): The gene identifier. Returns: str: The gene symbol corresponding to the identifier. \"\"\" return self . identifier_map [ query_gene_identifier ] update_genomic_interpretations_gene_identifier ( interpretations , phenopacket_path ) Update the genomic interpretations of a Phenopacket. Parameters: Name Type Description Default interpretations List [ Interpretation ] List of Interpretation objects. required Returns: Type Description List [ Interpretation ] List[Interpretation]: Updated list of Interpretation objects. Source code in src/pheval/utils/phenopacket_utils.py 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 def update_genomic_interpretations_gene_identifier ( self , interpretations : List [ Interpretation ], phenopacket_path : Path ) -> List [ Interpretation ]: \"\"\" Update the genomic interpretations of a Phenopacket. Args: interpretations (List[Interpretation]): List of Interpretation objects. Returns: List[Interpretation]: Updated list of Interpretation objects. \"\"\" updated_interpretations = copy ( list ( interpretations )) for updated_interpretation in updated_interpretations : for g in updated_interpretation . diagnosis . genomic_interpretations : updated_gene_identifier = self . find_identifier ( g . variant_interpretation . variation_descriptor . gene_context . symbol ) info_log . info ( f \"Updating gene identifier in { phenopacket_path } from \" f \" { g . variant_interpretation . variation_descriptor . gene_context . value_id } \" f \"to { updated_gene_identifier } \" ) g . variant_interpretation . variation_descriptor . gene_context . value_id = ( updated_gene_identifier ) del g . variant_interpretation . variation_descriptor . gene_context . alternate_ids [:] g . variant_interpretation . variation_descriptor . gene_context . alternate_ids . extend ( self . _find_alternate_ids ( g . variant_interpretation . variation_descriptor . gene_context . symbol ) ) return updated_interpretations GenomicVariant dataclass Represents a genomic variant. Parameters: Name Type Description Default chrom str The chromosome position of the variant recommended to be provided in the following format. required pos int Position of the variant following VCF convention. required ref str Reference allele following VCF convention. required alt str Alternate allele following VCF convention. required Source code in src/pheval/utils/phenopacket_utils.py 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 @dataclass class GenomicVariant : \"\"\" Represents a genomic variant. Args: chrom (str): The chromosome position of the variant recommended to be provided in the following format. This includes numerical designations from 1 to 22 representing autosomal chromosomes, as well as the sex chromosomes X and Y, and the mitochondrial chromosome MT. pos (int): Position of the variant following VCF convention. ref (str): Reference allele following VCF convention. alt (str): Alternate allele following VCF convention. \"\"\" chrom : str pos : int ref : str alt : str IncompatibleGenomeAssemblyError Bases: Exception Exception raised for incompatible genome assembly. Source code in src/pheval/utils/phenopacket_utils.py 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 class IncompatibleGenomeAssemblyError ( Exception ): \"\"\"Exception raised for incompatible genome assembly.\"\"\" def __init__ ( self , assembly , phenopacket , message = \"Incompatible Genome Assembly\" ): \"\"\" Initialise IncompatibleGenomeAssemblyError. Attributes: assembly (str): Incompatible genome assembly encountered. phenopacket (Path): Path to the Phenopacket associated with the error. message (str, optional): Custom error message (default is \"Incompatible Genome Assembly\"). \"\"\" self . assembly : str = assembly self . phenopacket : Path = phenopacket self . message : str = message super () . __init__ ( self . message ) def __str__ ( self ): return f \" { self . message } -> { self . assembly } in { self . phenopacket } \" __init__ ( assembly , phenopacket , message = 'Incompatible Genome Assembly' ) Initialise IncompatibleGenomeAssemblyError. Attributes: Name Type Description assembly str Incompatible genome assembly encountered. phenopacket Path Path to the Phenopacket associated with the error. message str Custom error message (default is \"Incompatible Genome Assembly\"). Source code in src/pheval/utils/phenopacket_utils.py 30 31 32 33 34 35 36 37 38 39 40 41 42 def __init__ ( self , assembly , phenopacket , message = \"Incompatible Genome Assembly\" ): \"\"\" Initialise IncompatibleGenomeAssemblyError. Attributes: assembly (str): Incompatible genome assembly encountered. phenopacket (Path): Path to the Phenopacket associated with the error. message (str, optional): Custom error message (default is \"Incompatible Genome Assembly\"). \"\"\" self . assembly : str = assembly self . phenopacket : Path = phenopacket self . message : str = message super () . __init__ ( self . message ) PhenopacketRebuilder Class for rebuilding a Phenopacket Source code in src/pheval/utils/phenopacket_utils.py 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 class PhenopacketRebuilder : \"\"\"Class for rebuilding a Phenopacket\"\"\" def __init__ ( self , phenopacket : Union [ Phenopacket , Family ]): \"\"\"Initialise PhenopacketUtil Attributes: phenopacket (Union[Phenopacket, Family]): Phenopacket or Family object \"\"\" self . phenopacket = phenopacket def update_interpretations ( self , interpretations : [ Interpretation ] ) -> Union [ Phenopacket , Family ]: \"\"\" Add the updated interpretations to a Phenopacket or Family. Args: interpretations (List[Interpretation]): The updated interpretations to be added. Returns: Union[Phenopacket, Family]: The Phenopacket or Family object with updated interpretations. \"\"\" phenopacket = copy ( self . phenopacket ) if hasattr ( phenopacket , \"proband\" ): del phenopacket . proband . interpretations [:] phenopacket . proband . interpretations . extend ( interpretations ) else : del phenopacket . interpretations [:] phenopacket . interpretations . extend ( interpretations ) return phenopacket def add_randomised_hpo ( self , randomised_hpo : [ PhenotypicFeature ]) -> Union [ Phenopacket , Family ]: \"\"\" Add randomised phenotypic profiles to a Phenopacket or Family. Args: randomised_hpo: The randomised phenotypic profiles to be added. Returns: Union[Phenopacket, Family] The Phenopacket or Family object with added randomised profiles. \"\"\" phenopacket = copy ( self . phenopacket ) if hasattr ( phenopacket , \"proband\" ): del phenopacket . proband . phenotypic_features [:] phenopacket . proband . phenotypic_features . extend ( randomised_hpo ) else : del phenopacket . phenotypic_features [:] phenopacket . phenotypic_features . extend ( randomised_hpo ) return phenopacket def add_spiked_vcf_path ( self , spiked_vcf_file_data : File ) -> Union [ Phenopacket , Family ]: \"\"\" Add a spiked VCF path to a Phenopacket or Family. Args: - spiked_vcf_file_data (File): The VCF file data to be added. Returns: - Phenopacket or Family: The Phenopacket or Family object with the added spiked VCF path. \"\"\" phenopacket = copy ( self . phenopacket ) phenopacket_files = [ file for file in phenopacket . files if file . file_attributes [ \"fileFormat\" ] != \"vcf\" ] phenopacket_files . append ( spiked_vcf_file_data ) del phenopacket . files [:] phenopacket . files . extend ( phenopacket_files ) return phenopacket __init__ ( phenopacket ) Initialise PhenopacketUtil Attributes: Name Type Description phenopacket Union [ Phenopacket , Family ] Phenopacket or Family object Source code in src/pheval/utils/phenopacket_utils.py 540 541 542 543 544 545 546 def __init__ ( self , phenopacket : Union [ Phenopacket , Family ]): \"\"\"Initialise PhenopacketUtil Attributes: phenopacket (Union[Phenopacket, Family]): Phenopacket or Family object \"\"\" self . phenopacket = phenopacket add_randomised_hpo ( randomised_hpo ) Add randomised phenotypic profiles to a Phenopacket or Family. Parameters: Name Type Description Default randomised_hpo [ PhenotypicFeature ] The randomised phenotypic profiles to be added. required Returns: Type Description Union [ Phenopacket , Family ] Union[Phenopacket, Family] The Phenopacket or Family object with added randomised profiles. Source code in src/pheval/utils/phenopacket_utils.py 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 def add_randomised_hpo ( self , randomised_hpo : [ PhenotypicFeature ]) -> Union [ Phenopacket , Family ]: \"\"\" Add randomised phenotypic profiles to a Phenopacket or Family. Args: randomised_hpo: The randomised phenotypic profiles to be added. Returns: Union[Phenopacket, Family] The Phenopacket or Family object with added randomised profiles. \"\"\" phenopacket = copy ( self . phenopacket ) if hasattr ( phenopacket , \"proband\" ): del phenopacket . proband . phenotypic_features [:] phenopacket . proband . phenotypic_features . extend ( randomised_hpo ) else : del phenopacket . phenotypic_features [:] phenopacket . phenotypic_features . extend ( randomised_hpo ) return phenopacket add_spiked_vcf_path ( spiked_vcf_file_data ) Add a spiked VCF path to a Phenopacket or Family. Args: - spiked_vcf_file_data (File): The VCF file data to be added. Returns: - Phenopacket or Family: The Phenopacket or Family object with the added spiked VCF path. Source code in src/pheval/utils/phenopacket_utils.py 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 def add_spiked_vcf_path ( self , spiked_vcf_file_data : File ) -> Union [ Phenopacket , Family ]: \"\"\" Add a spiked VCF path to a Phenopacket or Family. Args: - spiked_vcf_file_data (File): The VCF file data to be added. Returns: - Phenopacket or Family: The Phenopacket or Family object with the added spiked VCF path. \"\"\" phenopacket = copy ( self . phenopacket ) phenopacket_files = [ file for file in phenopacket . files if file . file_attributes [ \"fileFormat\" ] != \"vcf\" ] phenopacket_files . append ( spiked_vcf_file_data ) del phenopacket . files [:] phenopacket . files . extend ( phenopacket_files ) return phenopacket update_interpretations ( interpretations ) Add the updated interpretations to a Phenopacket or Family. Parameters: Name Type Description Default interpretations List [ Interpretation ] The updated interpretations to be added. required Returns: Type Description Union [ Phenopacket , Family ] Union[Phenopacket, Family]: The Phenopacket or Family object with updated interpretations. Source code in src/pheval/utils/phenopacket_utils.py 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 def update_interpretations ( self , interpretations : [ Interpretation ] ) -> Union [ Phenopacket , Family ]: \"\"\" Add the updated interpretations to a Phenopacket or Family. Args: interpretations (List[Interpretation]): The updated interpretations to be added. Returns: Union[Phenopacket, Family]: The Phenopacket or Family object with updated interpretations. \"\"\" phenopacket = copy ( self . phenopacket ) if hasattr ( phenopacket , \"proband\" ): del phenopacket . proband . interpretations [:] phenopacket . proband . interpretations . extend ( interpretations ) else : del phenopacket . interpretations [:] phenopacket . interpretations . extend ( interpretations ) return phenopacket PhenopacketUtil Class for retrieving data from a Phenopacket or Family object Source code in src/pheval/utils/phenopacket_utils.py 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 class PhenopacketUtil : \"\"\"Class for retrieving data from a Phenopacket or Family object\"\"\" def __init__ ( self , phenopacket_contents : Union [ Phenopacket , Family ]): \"\"\"Initialise PhenopacketUtil Args: phenopacket_contents (Union[Phenopacket, Family]): Phenopacket or Family object \"\"\" self . phenopacket_contents = phenopacket_contents def sample_id ( self ) -> str : \"\"\" Retrieve the sample ID from a Phenopacket or proband of a Family Returns: str: Sample ID \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . subject . id else : return self . phenopacket_contents . subject . id def phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all HPO terms Returns: List[PhenotypicFeature]: List of HPO terms \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . phenotypic_features else : return self . phenopacket_contents . phenotypic_features def observed_phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all observed HPO terms Returns: List[PhenotypicFeature]: List of observed HPO terms \"\"\" phenotypic_features = [] all_phenotypic_features = self . phenotypic_features () for p in all_phenotypic_features : if p . excluded : continue phenotypic_features . append ( p ) return phenotypic_features def negated_phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all negated HPO terms Returns: List[PhenotypicFeature]: List of negated HPO terms \"\"\" negated_phenotypic_features = [] all_phenotypic_features = self . phenotypic_features () for p in all_phenotypic_features : if p . excluded : negated_phenotypic_features . append ( p ) return negated_phenotypic_features def diseases ( self ) -> List [ Disease ]: \"\"\" Retrieve a list of Diseases associated with the proband Returns: List[Disease]: List of diseases \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . diseases else : return self . phenopacket_contents . diseases def _diagnosis_from_interpretations ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a list of disease diagnoses associated with the proband from the interpretations object Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" diagnoses = [] interpretation = self . interpretations () for i in interpretation : ( diagnoses . append ( ProbandDisease ( disease_name = i . diagnosis . disease . label , disease_identifier = i . diagnosis . disease . id , ) ) if i . diagnosis . disease . label != \"\" and i . diagnosis . disease . id != \"\" else None ) return diagnoses def _diagnosis_from_disease ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a list of disease diagnoses associated with the proband from the diseases object Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" diagnoses = [] for disease in self . diseases (): diagnoses . append ( ProbandDisease ( disease_name = disease . term . label , disease_identifier = disease . term . id ) ) return diagnoses def diagnoses ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a unique list of disease diagnoses associated with the proband from a Phenopacket Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" return list ( set ( self . _diagnosis_from_interpretations () + self . _diagnosis_from_disease ())) def interpretations ( self ) -> List [ Interpretation ]: \"\"\" Retrieve a list of interpretations from a Phenopacket Returns: List[Interpretation]: List of interpretations \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . interpretations else : return self . phenopacket_contents . interpretations def causative_variants ( self ) -> List [ ProbandCausativeVariant ]: \"\"\" Retrieve a list of causative variants listed in a Phenopacket Returns: List[ProbandCausativeVariant]: List of proband causative variants \"\"\" all_variants = [] interpretation = self . interpretations () for i in interpretation : for g in i . diagnosis . genomic_interpretations : vcf_record = g . variant_interpretation . variation_descriptor . vcf_record genotype = g . variant_interpretation . variation_descriptor . allelic_state variant_data = ProbandCausativeVariant ( self . phenopacket_contents . subject . id , vcf_record . genome_assembly , GenomicVariant ( vcf_record . chrom , vcf_record . pos , vcf_record . ref , vcf_record . alt , ), genotype . label , vcf_record . info , ) all_variants . append ( variant_data ) return all_variants def files ( self ) -> List [ File ]: \"\"\" Retrieve a list of files associated with a phenopacket Returns: List[File]: List of files associated with a phenopacket \"\"\" return self . phenopacket_contents . files def vcf_file_data ( self , phenopacket_path : Path , vcf_dir : Path ) -> File : \"\"\" Retrieve the genome assembly and VCF file name from a phenopacket. Args: phenopacket_path (Path): The path to the phenopacket file. vcf_dir (Path): The directory path where the VCF file is stored. Returns: File: The VCF file with updated URI pointing to the specified directory. Raises: IncorrectFileFormatError: If the provided file is not in .vcf or .vcf.gz format. IncompatibleGenomeAssemblyError: If the genome assembly of the VCF file is not compatible. Note: This function searches for a VCF file within the provided list of files, validates its format, and checks if the genome assembly is compatible. If the conditions are met, it updates the URI of the VCF file to the specified directory and returns the modified file object. \"\"\" compatible_genome_assembly = [ \"GRCh37\" , \"hg19\" , \"GRCh38\" , \"hg38\" ] vcf_data = [ file for file in self . files () if file . file_attributes [ \"fileFormat\" ] == \"vcf\" ][ 0 ] if not Path ( vcf_data . uri ) . name . endswith ( \".vcf\" ) and not Path ( vcf_data . uri ) . name . endswith ( \".vcf.gz\" ): raise IncorrectFileFormatError ( Path ( vcf_data . uri ), \".vcf or .vcf.gz file\" ) if vcf_data . file_attributes [ \"genomeAssembly\" ] not in compatible_genome_assembly : raise IncompatibleGenomeAssemblyError ( vcf_data . file_attributes [ \"genomeAssembly\" ], phenopacket_path ) vcf_data . uri = str ( vcf_dir . joinpath ( Path ( vcf_data . uri ) . name )) return vcf_data @staticmethod def _extract_diagnosed_gene ( genomic_interpretation : GenomicInterpretation , ) -> ProbandCausativeGene : \"\"\" Retrieve the disease causing genes from the variant descriptor field if not empty, otherwise, retrieves from the gene descriptor from a phenopacket. Args: genomic_interpretation (GenomicInterpretation): A genomic interpretation from a Phenopacket Returns: ProbandCausativeGene: The disease causing gene \"\"\" if genomic_interpretation . variant_interpretation . ByteSize () != 0 : return ProbandCausativeGene ( genomic_interpretation . variant_interpretation . variation_descriptor . gene_context . symbol , genomic_interpretation . variant_interpretation . variation_descriptor . gene_context . value_id , ) else : return ProbandCausativeGene ( gene_symbol = genomic_interpretation . gene . symbol , gene_identifier = genomic_interpretation . gene . value_id , ) def diagnosed_genes ( self ) -> List [ ProbandCausativeGene ]: \"\"\" Retrieve the disease causing genes from a phenopacket. Returns: List[ProbandCausativeGene]: List of causative genes \"\"\" pheno_interpretation = self . interpretations () genes = [] for i in pheno_interpretation : for g in i . diagnosis . genomic_interpretations : genes . append ( self . _extract_diagnosed_gene ( g )) genes = list ({ gene . gene_symbol : gene for gene in genes } . values ()) return genes def diagnosed_variants ( self ) -> List [ GenomicVariant ]: \"\"\" Retrieve a list of all known causative variants from a phenopacket. Returns: List[GenomicVariant]: List of causative variants \"\"\" variants = [] pheno_interpretation = self . interpretations () for i in pheno_interpretation : for g in i . diagnosis . genomic_interpretations : variant = GenomicVariant ( chrom = str ( g . variant_interpretation . variation_descriptor . vcf_record . chrom . replace ( \"chr\" , \"\" ) ), pos = int ( g . variant_interpretation . variation_descriptor . vcf_record . pos ), ref = g . variant_interpretation . variation_descriptor . vcf_record . ref , alt = g . variant_interpretation . variation_descriptor . vcf_record . alt , ) variants . append ( variant ) return variants def check_incomplete_variant_record ( self ) -> bool : \"\"\" Check if any variant record in the phenopacket has incomplete information. This method iterates through the diagnosed variant records and checks if any of them have missing or incomplete information such as empty chromosome, position, reference, or alternate allele. Returns: bool: True if any variant record is incomplete, False otherwise. \"\"\" variants = self . diagnosed_variants () for variant in variants : if ( variant . chrom == \"\" or variant . pos == 0 or variant . pos == \"\" or variant . ref == \"\" or variant . alt == \"\" ): return True return False def check_incomplete_gene_record ( self ) -> bool : \"\"\" Check if any gene record in the phenopacket has incomplete information. This method iterates through the diagnosed gene records and checks if any of them have missing or incomplete information such as gene name, or gene identifier. Returns: bool: True if any gene record is incomplete, False otherwise. \"\"\" genes = self . diagnosed_genes () for gene in genes : if gene . gene_symbol == \"\" or gene . gene_identifier == \"\" : return True return False def check_incomplete_disease_record ( self ) -> bool : \"\"\" Check if any disease record in the phenopacket has incomplete information. This method iterates through the diagnosed disease records and checks if any of them have missing or incomplete information such as empty disease name, or disease identifier. Returns: bool: True if any disease record is incomplete, False otherwise. \"\"\" if len ( self . diagnoses ()) == 0 : return True return False __init__ ( phenopacket_contents ) Initialise PhenopacketUtil Parameters: Name Type Description Default phenopacket_contents Union [ Phenopacket , Family ] Phenopacket or Family object required Source code in src/pheval/utils/phenopacket_utils.py 222 223 224 225 226 227 228 def __init__ ( self , phenopacket_contents : Union [ Phenopacket , Family ]): \"\"\"Initialise PhenopacketUtil Args: phenopacket_contents (Union[Phenopacket, Family]): Phenopacket or Family object \"\"\" self . phenopacket_contents = phenopacket_contents causative_variants () Retrieve a list of causative variants listed in a Phenopacket Returns: Type Description List [ ProbandCausativeVariant ] List[ProbandCausativeVariant]: List of proband causative variants Source code in src/pheval/utils/phenopacket_utils.py 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 def causative_variants ( self ) -> List [ ProbandCausativeVariant ]: \"\"\" Retrieve a list of causative variants listed in a Phenopacket Returns: List[ProbandCausativeVariant]: List of proband causative variants \"\"\" all_variants = [] interpretation = self . interpretations () for i in interpretation : for g in i . diagnosis . genomic_interpretations : vcf_record = g . variant_interpretation . variation_descriptor . vcf_record genotype = g . variant_interpretation . variation_descriptor . allelic_state variant_data = ProbandCausativeVariant ( self . phenopacket_contents . subject . id , vcf_record . genome_assembly , GenomicVariant ( vcf_record . chrom , vcf_record . pos , vcf_record . ref , vcf_record . alt , ), genotype . label , vcf_record . info , ) all_variants . append ( variant_data ) return all_variants check_incomplete_disease_record () Check if any disease record in the phenopacket has incomplete information. This method iterates through the diagnosed disease records and checks if any of them have missing or incomplete information such as empty disease name, or disease identifier. Returns: Name Type Description bool bool True if any disease record is incomplete, False otherwise. Source code in src/pheval/utils/phenopacket_utils.py 522 523 524 525 526 527 528 529 530 531 532 533 534 def check_incomplete_disease_record ( self ) -> bool : \"\"\" Check if any disease record in the phenopacket has incomplete information. This method iterates through the diagnosed disease records and checks if any of them have missing or incomplete information such as empty disease name, or disease identifier. Returns: bool: True if any disease record is incomplete, False otherwise. \"\"\" if len ( self . diagnoses ()) == 0 : return True return False check_incomplete_gene_record () Check if any gene record in the phenopacket has incomplete information. This method iterates through the diagnosed gene records and checks if any of them have missing or incomplete information such as gene name, or gene identifier. Returns: Name Type Description bool bool True if any gene record is incomplete, False otherwise. Source code in src/pheval/utils/phenopacket_utils.py 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 def check_incomplete_gene_record ( self ) -> bool : \"\"\" Check if any gene record in the phenopacket has incomplete information. This method iterates through the diagnosed gene records and checks if any of them have missing or incomplete information such as gene name, or gene identifier. Returns: bool: True if any gene record is incomplete, False otherwise. \"\"\" genes = self . diagnosed_genes () for gene in genes : if gene . gene_symbol == \"\" or gene . gene_identifier == \"\" : return True return False check_incomplete_variant_record () Check if any variant record in the phenopacket has incomplete information. This method iterates through the diagnosed variant records and checks if any of them have missing or incomplete information such as empty chromosome, position, reference, or alternate allele. Returns: Name Type Description bool bool True if any variant record is incomplete, False otherwise. Source code in src/pheval/utils/phenopacket_utils.py 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 def check_incomplete_variant_record ( self ) -> bool : \"\"\" Check if any variant record in the phenopacket has incomplete information. This method iterates through the diagnosed variant records and checks if any of them have missing or incomplete information such as empty chromosome, position, reference, or alternate allele. Returns: bool: True if any variant record is incomplete, False otherwise. \"\"\" variants = self . diagnosed_variants () for variant in variants : if ( variant . chrom == \"\" or variant . pos == 0 or variant . pos == \"\" or variant . ref == \"\" or variant . alt == \"\" ): return True return False diagnosed_genes () Retrieve the disease causing genes from a phenopacket. Returns: List[ProbandCausativeGene]: List of causative genes Source code in src/pheval/utils/phenopacket_utils.py 446 447 448 449 450 451 452 453 454 455 456 457 458 def diagnosed_genes ( self ) -> List [ ProbandCausativeGene ]: \"\"\" Retrieve the disease causing genes from a phenopacket. Returns: List[ProbandCausativeGene]: List of causative genes \"\"\" pheno_interpretation = self . interpretations () genes = [] for i in pheno_interpretation : for g in i . diagnosis . genomic_interpretations : genes . append ( self . _extract_diagnosed_gene ( g )) genes = list ({ gene . gene_symbol : gene for gene in genes } . values ()) return genes diagnosed_variants () Retrieve a list of all known causative variants from a phenopacket. Returns: List[GenomicVariant]: List of causative variants Source code in src/pheval/utils/phenopacket_utils.py 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 def diagnosed_variants ( self ) -> List [ GenomicVariant ]: \"\"\" Retrieve a list of all known causative variants from a phenopacket. Returns: List[GenomicVariant]: List of causative variants \"\"\" variants = [] pheno_interpretation = self . interpretations () for i in pheno_interpretation : for g in i . diagnosis . genomic_interpretations : variant = GenomicVariant ( chrom = str ( g . variant_interpretation . variation_descriptor . vcf_record . chrom . replace ( \"chr\" , \"\" ) ), pos = int ( g . variant_interpretation . variation_descriptor . vcf_record . pos ), ref = g . variant_interpretation . variation_descriptor . vcf_record . ref , alt = g . variant_interpretation . variation_descriptor . vcf_record . alt , ) variants . append ( variant ) return variants diagnoses () Retrieve a unique list of disease diagnoses associated with the proband from a Phenopacket Returns: Type Description List [ ProbandDisease ] List[ProbandDisease]: List of diagnosed diseases Source code in src/pheval/utils/phenopacket_utils.py 331 332 333 334 335 336 337 338 def diagnoses ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a unique list of disease diagnoses associated with the proband from a Phenopacket Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" return list ( set ( self . _diagnosis_from_interpretations () + self . _diagnosis_from_disease ())) diseases () Retrieve a list of Diseases associated with the proband Returns: Type Description List [ Disease ] List[Disease]: List of diseases Source code in src/pheval/utils/phenopacket_utils.py 283 284 285 286 287 288 289 290 291 292 293 def diseases ( self ) -> List [ Disease ]: \"\"\" Retrieve a list of Diseases associated with the proband Returns: List[Disease]: List of diseases \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . diseases else : return self . phenopacket_contents . diseases files () Retrieve a list of files associated with a phenopacket Returns: Type Description List [ File ] List[File]: List of files associated with a phenopacket Source code in src/pheval/utils/phenopacket_utils.py 380 381 382 383 384 385 386 387 def files ( self ) -> List [ File ]: \"\"\" Retrieve a list of files associated with a phenopacket Returns: List[File]: List of files associated with a phenopacket \"\"\" return self . phenopacket_contents . files interpretations () Retrieve a list of interpretations from a Phenopacket Returns: Type Description List [ Interpretation ] List[Interpretation]: List of interpretations Source code in src/pheval/utils/phenopacket_utils.py 340 341 342 343 344 345 346 347 348 349 350 def interpretations ( self ) -> List [ Interpretation ]: \"\"\" Retrieve a list of interpretations from a Phenopacket Returns: List[Interpretation]: List of interpretations \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . interpretations else : return self . phenopacket_contents . interpretations negated_phenotypic_features () Retrieve a list of all negated HPO terms Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: List of negated HPO terms Source code in src/pheval/utils/phenopacket_utils.py 269 270 271 272 273 274 275 276 277 278 279 280 281 def negated_phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all negated HPO terms Returns: List[PhenotypicFeature]: List of negated HPO terms \"\"\" negated_phenotypic_features = [] all_phenotypic_features = self . phenotypic_features () for p in all_phenotypic_features : if p . excluded : negated_phenotypic_features . append ( p ) return negated_phenotypic_features observed_phenotypic_features () Retrieve a list of all observed HPO terms Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: List of observed HPO terms Source code in src/pheval/utils/phenopacket_utils.py 254 255 256 257 258 259 260 261 262 263 264 265 266 267 def observed_phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all observed HPO terms Returns: List[PhenotypicFeature]: List of observed HPO terms \"\"\" phenotypic_features = [] all_phenotypic_features = self . phenotypic_features () for p in all_phenotypic_features : if p . excluded : continue phenotypic_features . append ( p ) return phenotypic_features phenotypic_features () Retrieve a list of all HPO terms Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: List of HPO terms Source code in src/pheval/utils/phenopacket_utils.py 242 243 244 245 246 247 248 249 250 251 252 def phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all HPO terms Returns: List[PhenotypicFeature]: List of HPO terms \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . phenotypic_features else : return self . phenopacket_contents . phenotypic_features sample_id () Retrieve the sample ID from a Phenopacket or proband of a Family Returns: Name Type Description str str Sample ID Source code in src/pheval/utils/phenopacket_utils.py 230 231 232 233 234 235 236 237 238 239 240 def sample_id ( self ) -> str : \"\"\" Retrieve the sample ID from a Phenopacket or proband of a Family Returns: str: Sample ID \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . subject . id else : return self . phenopacket_contents . subject . id vcf_file_data ( phenopacket_path , vcf_dir ) Retrieve the genome assembly and VCF file name from a phenopacket. Parameters: Name Type Description Default phenopacket_path Path The path to the phenopacket file. required vcf_dir Path The directory path where the VCF file is stored. required Returns: Name Type Description File File The VCF file with updated URI pointing to the specified directory. Raises: Type Description IncorrectFileFormatError If the provided file is not in .vcf or .vcf.gz format. IncompatibleGenomeAssemblyError If the genome assembly of the VCF file is not compatible. Note This function searches for a VCF file within the provided list of files, validates its format, and checks if the genome assembly is compatible. If the conditions are met, it updates the URI of the VCF file to the specified directory and returns the modified file object. Source code in src/pheval/utils/phenopacket_utils.py 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 def vcf_file_data ( self , phenopacket_path : Path , vcf_dir : Path ) -> File : \"\"\" Retrieve the genome assembly and VCF file name from a phenopacket. Args: phenopacket_path (Path): The path to the phenopacket file. vcf_dir (Path): The directory path where the VCF file is stored. Returns: File: The VCF file with updated URI pointing to the specified directory. Raises: IncorrectFileFormatError: If the provided file is not in .vcf or .vcf.gz format. IncompatibleGenomeAssemblyError: If the genome assembly of the VCF file is not compatible. Note: This function searches for a VCF file within the provided list of files, validates its format, and checks if the genome assembly is compatible. If the conditions are met, it updates the URI of the VCF file to the specified directory and returns the modified file object. \"\"\" compatible_genome_assembly = [ \"GRCh37\" , \"hg19\" , \"GRCh38\" , \"hg38\" ] vcf_data = [ file for file in self . files () if file . file_attributes [ \"fileFormat\" ] == \"vcf\" ][ 0 ] if not Path ( vcf_data . uri ) . name . endswith ( \".vcf\" ) and not Path ( vcf_data . uri ) . name . endswith ( \".vcf.gz\" ): raise IncorrectFileFormatError ( Path ( vcf_data . uri ), \".vcf or .vcf.gz file\" ) if vcf_data . file_attributes [ \"genomeAssembly\" ] not in compatible_genome_assembly : raise IncompatibleGenomeAssemblyError ( vcf_data . file_attributes [ \"genomeAssembly\" ], phenopacket_path ) vcf_data . uri = str ( vcf_dir . joinpath ( Path ( vcf_data . uri ) . name )) return vcf_data ProbandCausativeGene dataclass Represents a causative gene associated with a proband Parameters: Name Type Description Default gene_symbol str Symbol representing the gene required gene_identifier str The ENSEMBL gene identifier for the result entry required Notes: While we recommend providing the gene identifier in the ENSEMBL namespace, any matching format used in Phenopacket interpretations and result output is acceptable for result matching purposes in the analysis. Source code in src/pheval/utils/phenopacket_utils.py 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 @dataclass class ProbandCausativeGene : \"\"\" Represents a causative gene associated with a proband Args: gene_symbol (str): Symbol representing the gene gene_identifier (str): The ENSEMBL gene identifier for the result entry Notes: While we recommend providing the gene identifier in the ENSEMBL namespace, any matching format used in Phenopacket interpretations and result output is acceptable for result matching purposes in the analysis. \"\"\" gene_symbol : str gene_identifier : str ProbandCausativeVariant dataclass Represents a causative variant associated with a proband Parameters: Name Type Description Default proband_id str ID of the proband required assembly str Genome assembly required variant GenomicVariant Genomic variant associated with the proband required genotype str Genotype information for the variant required info str Additional information about the variant (default is an empty string) '' Source code in src/pheval/utils/phenopacket_utils.py 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 @dataclass class ProbandCausativeVariant : \"\"\" Represents a causative variant associated with a proband Args: proband_id (str): ID of the proband assembly (str): Genome assembly variant (GenomicVariant): Genomic variant associated with the proband genotype (str): Genotype information for the variant info (str, optional): Additional information about the variant (default is an empty string) \"\"\" proband_id : str assembly : str variant : GenomicVariant genotype : str info : str = \"\" ProbandDisease dataclass Represents a disease associated with a proband Parameters: Name Type Description Default disease_name str Name of the disease required disease_identifier str Identifier for the disease result entry in the OMIM namespace required Notes While we recommend providing the disease identifier in the OMIM namespace, any matching format used in Phenopacket interpretations and result output is acceptable for result matching purposes in the analysis. Source code in src/pheval/utils/phenopacket_utils.py 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 @dataclass ( frozen = True , eq = True ) class ProbandDisease : \"\"\" Represents a disease associated with a proband Args: disease_name (str): Name of the disease disease_identifier (str): Identifier for the disease result entry in the OMIM namespace Notes: While we recommend providing the disease identifier in the OMIM namespace, any matching format used in Phenopacket interpretations and result output is acceptable for result matching purposes in the analysis. \"\"\" disease_name : str disease_identifier : str create_gene_identifier_map () Create a mapping of gene identifiers to gene symbols using HGNC data. Returns: Name Type Description dict dict A mapping of gene identifiers to gene symbols. Notes The dictionary structure: { 'identifier': 'gene_symbol', ... } Source code in src/pheval/utils/phenopacket_utils.py 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 def create_gene_identifier_map () -> dict : \"\"\" Create a mapping of gene identifiers to gene symbols using HGNC data. Returns: dict: A mapping of gene identifiers to gene symbols. Notes: The dictionary structure: { 'identifier': 'gene_symbol', ... } \"\"\" hgnc_df = read_hgnc_data () identifier_map = {} for _index , row in hgnc_df . iterrows (): identifier_map [ row [ \"ensembl_gene_id\" ]] = row [ \"symbol\" ] identifier_map [ row [ \"hgnc_id\" ]] = row [ \"symbol\" ] identifier_map [ row [ \"entrez_id\" ]] = row [ \"symbol\" ] identifier_map [ row [ \"refseq_accession\" ]] = row [ \"symbol\" ] return identifier_map create_hgnc_dict () Create a dictionary as a reference for updating gene symbols and identifiers based on HGNC data. Returns: Name Type Description defaultdict defaultdict A dictionary containing gene symbols as keys and their associated gene information. Notes The dictionary structure: { 'gene_symbol': { 'ensembl_id': str, 'hgnc_id': str, 'entrez_id': str, 'refseq_accession': str, 'previous_symbol': [str, ...] }, ... } Source code in src/pheval/utils/phenopacket_utils.py 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 def create_hgnc_dict () -> defaultdict : \"\"\" Create a dictionary as a reference for updating gene symbols and identifiers based on HGNC data. Returns: defaultdict: A dictionary containing gene symbols as keys and their associated gene information. Notes: The dictionary structure: { 'gene_symbol': { 'ensembl_id': str, 'hgnc_id': str, 'entrez_id': str, 'refseq_accession': str, 'previous_symbol': [str, ...] }, ... } \"\"\" hgnc_df = read_hgnc_data () hgnc_data = defaultdict ( dict ) for _index , row in hgnc_df . iterrows (): previous_names = [] hgnc_data [ row [ \"symbol\" ]][ \"ensembl_id\" ] = row [ \"ensembl_gene_id\" ] hgnc_data [ row [ \"symbol\" ]][ \"hgnc_id\" ] = row [ \"hgnc_id\" ] hgnc_data [ row [ \"symbol\" ]][ \"entrez_id\" ] = row [ \"entrez_id\" ] hgnc_data [ row [ \"symbol\" ]][ \"refseq_accession\" ] = row [ \"refseq_accession\" ] previous = str ( row [ \"prev_symbol\" ]) . split ( \"|\" ) for p in previous : previous_names . append ( p . strip ( '\"' )) hgnc_data [ row [ \"symbol\" ]][ \"previous_symbol\" ] = previous_names return hgnc_data create_json_message ( phenopacket ) Create a JSON message for writing to a file. Args: - phenopacket (Union[Phenopacket, Family]): The Phenopacket or Family object to convert to JSON. Returns: - str: A JSON-formatted string representation of the Phenopacket or Family object. Source code in src/pheval/utils/phenopacket_utils.py 608 609 610 611 612 613 614 615 616 617 618 def create_json_message ( phenopacket : Union [ Phenopacket , Family ]) -> str : \"\"\" Create a JSON message for writing to a file. Args: - phenopacket (Union[Phenopacket, Family]): The Phenopacket or Family object to convert to JSON. Returns: - str: A JSON-formatted string representation of the Phenopacket or Family object. \"\"\" return MessageToJson ( phenopacket ) phenopacket_reader ( file ) Read a Phenopacket file and returns its contents as a Phenopacket or Family object Parameters: Name Type Description Default file Path Path to the Phenopacket file required Returns: Type Description Union [ Phenopacket , Family ] Union[Phenopacket, Family]: Contents of the Phenopacket file as a Phenopacket or Family object Source code in src/pheval/utils/phenopacket_utils.py 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 def phenopacket_reader ( file : Path ) -> Union [ Phenopacket , Family ]: \"\"\" Read a Phenopacket file and returns its contents as a Phenopacket or Family object Args: file (Path): Path to the Phenopacket file Returns: Union[Phenopacket, Family]: Contents of the Phenopacket file as a Phenopacket or Family object \"\"\" file = open ( file , \"r\" ) phenopacket = json . load ( file ) file . close () if \"proband\" in phenopacket : return Parse ( json . dumps ( phenopacket ), Family ()) else : return Parse ( json . dumps ( phenopacket ), Phenopacket ()) read_hgnc_data () Read HGNC data from a file and return it as a Pandas DataFrame. Returns: Type Description DataFrame pd.DataFrame: DataFrame containing the HGNC data. Source code in src/pheval/utils/phenopacket_utils.py 125 126 127 128 129 130 131 132 133 134 135 136 def read_hgnc_data () -> pd . DataFrame : \"\"\" Read HGNC data from a file and return it as a Pandas DataFrame. Returns: pd.DataFrame: DataFrame containing the HGNC data. \"\"\" return pd . read_csv ( os . path . dirname ( __file__ ) . replace ( \"utils\" , \"resources/hgnc_complete_set.txt\" ), delimiter = \" \\t \" , dtype = str , ) write_phenopacket ( phenopacket , output_file ) Write a Phenopacket or Family object to a file in JSON format. Parameters: Name Type Description Default phenopacket Phenopacket or Family The Phenopacket or Family object to be written. required output_file Path The Path object representing the file to write the Phenopacket data. required Returns: Type Description None None Source code in src/pheval/utils/phenopacket_utils.py 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 def write_phenopacket ( phenopacket : Union [ Phenopacket , Family ], output_file : Path ) -> None : \"\"\" Write a Phenopacket or Family object to a file in JSON format. Args: phenopacket (Phenopacket or Family): The Phenopacket or Family object to be written. output_file (Path): The Path object representing the file to write the Phenopacket data. Returns: None \"\"\" phenopacket_json = create_json_message ( phenopacket ) with open ( output_file , \"w\" ) as outfile : outfile . write ( phenopacket_json ) outfile . close ()","title":"Phenopacket utils"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater","text":"Class for updating gene identifiers within genomic interpretations. Source code in src/pheval/utils/phenopacket_utils.py 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 class GeneIdentifierUpdater : \"\"\"Class for updating gene identifiers within genomic interpretations.\"\"\" def __init__ ( self , gene_identifier : str , hgnc_data : dict = None , identifier_map : dict = None ): \"\"\" Initialise the GeneIdentifierUpdater. Args: gene_identifier (str): The gene identifier to update to. hgnc_data (dict): A dictionary containing HGNC data (default: None). identifier_map (dict): A dictionary mapping gene identifiers (default: None). \"\"\" self . hgnc_data = hgnc_data self . gene_identifier = gene_identifier self . identifier_map = identifier_map def find_identifier ( self , gene_symbol : str ) -> str : \"\"\" Find the specified gene identifier for a gene symbol. Args: gene_symbol (str): The gene symbol to find the identifier for. Returns: str: The identified gene identifier. \"\"\" if gene_symbol in self . hgnc_data . keys (): return self . hgnc_data [ gene_symbol ][ self . gene_identifier ] else : for _symbol , data in self . hgnc_data . items (): for prev_symbol in data [ \"previous_symbol\" ]: if prev_symbol == gene_symbol : return data [ self . gene_identifier ] def obtain_gene_symbol_from_identifier ( self , query_gene_identifier : str ) -> str : \"\"\" Obtain gene symbol from a gene identifier. Args: query_gene_identifier (str): The gene identifier. Returns: str: The gene symbol corresponding to the identifier. \"\"\" return self . identifier_map [ query_gene_identifier ] def _find_alternate_ids ( self , gene_symbol : str ) -> List [ str ]: \"\"\" Find the alternate IDs for a gene symbol. Args: gene_symbol (str): The gene symbol to find alternate IDs for. Returns: List[str]: List of alternate IDs for the gene symbol. \"\"\" if gene_symbol in self . hgnc_data . keys (): return [ self . hgnc_data [ gene_symbol ][ \"hgnc_id\" ], \"ncbigene:\" + self . hgnc_data [ gene_symbol ][ \"entrez_id\" ], \"ensembl:\" + self . hgnc_data [ gene_symbol ][ \"ensembl_id\" ], \"symbol:\" + gene_symbol , ] else : for symbol , data in self . hgnc_data . items (): for prev_symbol in data [ \"previous_symbol\" ]: if prev_symbol == gene_symbol : return [ data [ \"hgnc_id\" ], \"ncbigene:\" + data [ \"entrez_id\" ], \"ensembl:\" + data [ \"ensembl_id\" ], \"symbol:\" + symbol , ] def update_genomic_interpretations_gene_identifier ( self , interpretations : List [ Interpretation ], phenopacket_path : Path ) -> List [ Interpretation ]: \"\"\" Update the genomic interpretations of a Phenopacket. Args: interpretations (List[Interpretation]): List of Interpretation objects. Returns: List[Interpretation]: Updated list of Interpretation objects. \"\"\" updated_interpretations = copy ( list ( interpretations )) for updated_interpretation in updated_interpretations : for g in updated_interpretation . diagnosis . genomic_interpretations : updated_gene_identifier = self . find_identifier ( g . variant_interpretation . variation_descriptor . gene_context . symbol ) info_log . info ( f \"Updating gene identifier in { phenopacket_path } from \" f \" { g . variant_interpretation . variation_descriptor . gene_context . value_id } \" f \"to { updated_gene_identifier } \" ) g . variant_interpretation . variation_descriptor . gene_context . value_id = ( updated_gene_identifier ) del g . variant_interpretation . variation_descriptor . gene_context . alternate_ids [:] g . variant_interpretation . variation_descriptor . gene_context . alternate_ids . extend ( self . _find_alternate_ids ( g . variant_interpretation . variation_descriptor . gene_context . symbol ) ) return updated_interpretations","title":"GeneIdentifierUpdater"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.__init__","text":"Initialise the GeneIdentifierUpdater. Parameters: Name Type Description Default gene_identifier str The gene identifier to update to. required hgnc_data dict A dictionary containing HGNC data (default: None). None identifier_map dict A dictionary mapping gene identifiers (default: None). None Source code in src/pheval/utils/phenopacket_utils.py 641 642 643 644 645 646 647 648 649 650 651 652 653 def __init__ ( self , gene_identifier : str , hgnc_data : dict = None , identifier_map : dict = None ): \"\"\" Initialise the GeneIdentifierUpdater. Args: gene_identifier (str): The gene identifier to update to. hgnc_data (dict): A dictionary containing HGNC data (default: None). identifier_map (dict): A dictionary mapping gene identifiers (default: None). \"\"\" self . hgnc_data = hgnc_data self . gene_identifier = gene_identifier self . identifier_map = identifier_map","title":"__init__"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.find_identifier","text":"Find the specified gene identifier for a gene symbol. Parameters: Name Type Description Default gene_symbol str The gene symbol to find the identifier for. required Returns: Name Type Description str str The identified gene identifier. Source code in src/pheval/utils/phenopacket_utils.py 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 def find_identifier ( self , gene_symbol : str ) -> str : \"\"\" Find the specified gene identifier for a gene symbol. Args: gene_symbol (str): The gene symbol to find the identifier for. Returns: str: The identified gene identifier. \"\"\" if gene_symbol in self . hgnc_data . keys (): return self . hgnc_data [ gene_symbol ][ self . gene_identifier ] else : for _symbol , data in self . hgnc_data . items (): for prev_symbol in data [ \"previous_symbol\" ]: if prev_symbol == gene_symbol : return data [ self . gene_identifier ]","title":"find_identifier"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.obtain_gene_symbol_from_identifier","text":"Obtain gene symbol from a gene identifier. Parameters: Name Type Description Default query_gene_identifier str The gene identifier. required Returns: Name Type Description str str The gene symbol corresponding to the identifier. Source code in src/pheval/utils/phenopacket_utils.py 673 674 675 676 677 678 679 680 681 682 683 def obtain_gene_symbol_from_identifier ( self , query_gene_identifier : str ) -> str : \"\"\" Obtain gene symbol from a gene identifier. Args: query_gene_identifier (str): The gene identifier. Returns: str: The gene symbol corresponding to the identifier. \"\"\" return self . identifier_map [ query_gene_identifier ]","title":"obtain_gene_symbol_from_identifier"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.GeneIdentifierUpdater.update_genomic_interpretations_gene_identifier","text":"Update the genomic interpretations of a Phenopacket. Parameters: Name Type Description Default interpretations List [ Interpretation ] List of Interpretation objects. required Returns: Type Description List [ Interpretation ] List[Interpretation]: Updated list of Interpretation objects. Source code in src/pheval/utils/phenopacket_utils.py 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 def update_genomic_interpretations_gene_identifier ( self , interpretations : List [ Interpretation ], phenopacket_path : Path ) -> List [ Interpretation ]: \"\"\" Update the genomic interpretations of a Phenopacket. Args: interpretations (List[Interpretation]): List of Interpretation objects. Returns: List[Interpretation]: Updated list of Interpretation objects. \"\"\" updated_interpretations = copy ( list ( interpretations )) for updated_interpretation in updated_interpretations : for g in updated_interpretation . diagnosis . genomic_interpretations : updated_gene_identifier = self . find_identifier ( g . variant_interpretation . variation_descriptor . gene_context . symbol ) info_log . info ( f \"Updating gene identifier in { phenopacket_path } from \" f \" { g . variant_interpretation . variation_descriptor . gene_context . value_id } \" f \"to { updated_gene_identifier } \" ) g . variant_interpretation . variation_descriptor . gene_context . value_id = ( updated_gene_identifier ) del g . variant_interpretation . variation_descriptor . gene_context . alternate_ids [:] g . variant_interpretation . variation_descriptor . gene_context . alternate_ids . extend ( self . _find_alternate_ids ( g . variant_interpretation . variation_descriptor . gene_context . symbol ) ) return updated_interpretations","title":"update_genomic_interpretations_gene_identifier"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.GenomicVariant","text":"Represents a genomic variant. Parameters: Name Type Description Default chrom str The chromosome position of the variant recommended to be provided in the following format. required pos int Position of the variant following VCF convention. required ref str Reference allele following VCF convention. required alt str Alternate allele following VCF convention. required Source code in src/pheval/utils/phenopacket_utils.py 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 @dataclass class GenomicVariant : \"\"\" Represents a genomic variant. Args: chrom (str): The chromosome position of the variant recommended to be provided in the following format. This includes numerical designations from 1 to 22 representing autosomal chromosomes, as well as the sex chromosomes X and Y, and the mitochondrial chromosome MT. pos (int): Position of the variant following VCF convention. ref (str): Reference allele following VCF convention. alt (str): Alternate allele following VCF convention. \"\"\" chrom : str pos : int ref : str alt : str","title":"GenomicVariant"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.IncompatibleGenomeAssemblyError","text":"Bases: Exception Exception raised for incompatible genome assembly. Source code in src/pheval/utils/phenopacket_utils.py 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 class IncompatibleGenomeAssemblyError ( Exception ): \"\"\"Exception raised for incompatible genome assembly.\"\"\" def __init__ ( self , assembly , phenopacket , message = \"Incompatible Genome Assembly\" ): \"\"\" Initialise IncompatibleGenomeAssemblyError. Attributes: assembly (str): Incompatible genome assembly encountered. phenopacket (Path): Path to the Phenopacket associated with the error. message (str, optional): Custom error message (default is \"Incompatible Genome Assembly\"). \"\"\" self . assembly : str = assembly self . phenopacket : Path = phenopacket self . message : str = message super () . __init__ ( self . message ) def __str__ ( self ): return f \" { self . message } -> { self . assembly } in { self . phenopacket } \"","title":"IncompatibleGenomeAssemblyError"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.IncompatibleGenomeAssemblyError.__init__","text":"Initialise IncompatibleGenomeAssemblyError. Attributes: Name Type Description assembly str Incompatible genome assembly encountered. phenopacket Path Path to the Phenopacket associated with the error. message str Custom error message (default is \"Incompatible Genome Assembly\"). Source code in src/pheval/utils/phenopacket_utils.py 30 31 32 33 34 35 36 37 38 39 40 41 42 def __init__ ( self , assembly , phenopacket , message = \"Incompatible Genome Assembly\" ): \"\"\" Initialise IncompatibleGenomeAssemblyError. Attributes: assembly (str): Incompatible genome assembly encountered. phenopacket (Path): Path to the Phenopacket associated with the error. message (str, optional): Custom error message (default is \"Incompatible Genome Assembly\"). \"\"\" self . assembly : str = assembly self . phenopacket : Path = phenopacket self . message : str = message super () . __init__ ( self . message )","title":"__init__"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketRebuilder","text":"Class for rebuilding a Phenopacket Source code in src/pheval/utils/phenopacket_utils.py 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 class PhenopacketRebuilder : \"\"\"Class for rebuilding a Phenopacket\"\"\" def __init__ ( self , phenopacket : Union [ Phenopacket , Family ]): \"\"\"Initialise PhenopacketUtil Attributes: phenopacket (Union[Phenopacket, Family]): Phenopacket or Family object \"\"\" self . phenopacket = phenopacket def update_interpretations ( self , interpretations : [ Interpretation ] ) -> Union [ Phenopacket , Family ]: \"\"\" Add the updated interpretations to a Phenopacket or Family. Args: interpretations (List[Interpretation]): The updated interpretations to be added. Returns: Union[Phenopacket, Family]: The Phenopacket or Family object with updated interpretations. \"\"\" phenopacket = copy ( self . phenopacket ) if hasattr ( phenopacket , \"proband\" ): del phenopacket . proband . interpretations [:] phenopacket . proband . interpretations . extend ( interpretations ) else : del phenopacket . interpretations [:] phenopacket . interpretations . extend ( interpretations ) return phenopacket def add_randomised_hpo ( self , randomised_hpo : [ PhenotypicFeature ]) -> Union [ Phenopacket , Family ]: \"\"\" Add randomised phenotypic profiles to a Phenopacket or Family. Args: randomised_hpo: The randomised phenotypic profiles to be added. Returns: Union[Phenopacket, Family] The Phenopacket or Family object with added randomised profiles. \"\"\" phenopacket = copy ( self . phenopacket ) if hasattr ( phenopacket , \"proband\" ): del phenopacket . proband . phenotypic_features [:] phenopacket . proband . phenotypic_features . extend ( randomised_hpo ) else : del phenopacket . phenotypic_features [:] phenopacket . phenotypic_features . extend ( randomised_hpo ) return phenopacket def add_spiked_vcf_path ( self , spiked_vcf_file_data : File ) -> Union [ Phenopacket , Family ]: \"\"\" Add a spiked VCF path to a Phenopacket or Family. Args: - spiked_vcf_file_data (File): The VCF file data to be added. Returns: - Phenopacket or Family: The Phenopacket or Family object with the added spiked VCF path. \"\"\" phenopacket = copy ( self . phenopacket ) phenopacket_files = [ file for file in phenopacket . files if file . file_attributes [ \"fileFormat\" ] != \"vcf\" ] phenopacket_files . append ( spiked_vcf_file_data ) del phenopacket . files [:] phenopacket . files . extend ( phenopacket_files ) return phenopacket","title":"PhenopacketRebuilder"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.__init__","text":"Initialise PhenopacketUtil Attributes: Name Type Description phenopacket Union [ Phenopacket , Family ] Phenopacket or Family object Source code in src/pheval/utils/phenopacket_utils.py 540 541 542 543 544 545 546 def __init__ ( self , phenopacket : Union [ Phenopacket , Family ]): \"\"\"Initialise PhenopacketUtil Attributes: phenopacket (Union[Phenopacket, Family]): Phenopacket or Family object \"\"\" self . phenopacket = phenopacket","title":"__init__"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.add_randomised_hpo","text":"Add randomised phenotypic profiles to a Phenopacket or Family. Parameters: Name Type Description Default randomised_hpo [ PhenotypicFeature ] The randomised phenotypic profiles to be added. required Returns: Type Description Union [ Phenopacket , Family ] Union[Phenopacket, Family] The Phenopacket or Family object with added randomised profiles. Source code in src/pheval/utils/phenopacket_utils.py 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 def add_randomised_hpo ( self , randomised_hpo : [ PhenotypicFeature ]) -> Union [ Phenopacket , Family ]: \"\"\" Add randomised phenotypic profiles to a Phenopacket or Family. Args: randomised_hpo: The randomised phenotypic profiles to be added. Returns: Union[Phenopacket, Family] The Phenopacket or Family object with added randomised profiles. \"\"\" phenopacket = copy ( self . phenopacket ) if hasattr ( phenopacket , \"proband\" ): del phenopacket . proband . phenotypic_features [:] phenopacket . proband . phenotypic_features . extend ( randomised_hpo ) else : del phenopacket . phenotypic_features [:] phenopacket . phenotypic_features . extend ( randomised_hpo ) return phenopacket","title":"add_randomised_hpo"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.add_spiked_vcf_path","text":"Add a spiked VCF path to a Phenopacket or Family. Args: - spiked_vcf_file_data (File): The VCF file data to be added. Returns: - Phenopacket or Family: The Phenopacket or Family object with the added spiked VCF path. Source code in src/pheval/utils/phenopacket_utils.py 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 def add_spiked_vcf_path ( self , spiked_vcf_file_data : File ) -> Union [ Phenopacket , Family ]: \"\"\" Add a spiked VCF path to a Phenopacket or Family. Args: - spiked_vcf_file_data (File): The VCF file data to be added. Returns: - Phenopacket or Family: The Phenopacket or Family object with the added spiked VCF path. \"\"\" phenopacket = copy ( self . phenopacket ) phenopacket_files = [ file for file in phenopacket . files if file . file_attributes [ \"fileFormat\" ] != \"vcf\" ] phenopacket_files . append ( spiked_vcf_file_data ) del phenopacket . files [:] phenopacket . files . extend ( phenopacket_files ) return phenopacket","title":"add_spiked_vcf_path"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketRebuilder.update_interpretations","text":"Add the updated interpretations to a Phenopacket or Family. Parameters: Name Type Description Default interpretations List [ Interpretation ] The updated interpretations to be added. required Returns: Type Description Union [ Phenopacket , Family ] Union[Phenopacket, Family]: The Phenopacket or Family object with updated interpretations. Source code in src/pheval/utils/phenopacket_utils.py 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 def update_interpretations ( self , interpretations : [ Interpretation ] ) -> Union [ Phenopacket , Family ]: \"\"\" Add the updated interpretations to a Phenopacket or Family. Args: interpretations (List[Interpretation]): The updated interpretations to be added. Returns: Union[Phenopacket, Family]: The Phenopacket or Family object with updated interpretations. \"\"\" phenopacket = copy ( self . phenopacket ) if hasattr ( phenopacket , \"proband\" ): del phenopacket . proband . interpretations [:] phenopacket . proband . interpretations . extend ( interpretations ) else : del phenopacket . interpretations [:] phenopacket . interpretations . extend ( interpretations ) return phenopacket","title":"update_interpretations"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil","text":"Class for retrieving data from a Phenopacket or Family object Source code in src/pheval/utils/phenopacket_utils.py 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 class PhenopacketUtil : \"\"\"Class for retrieving data from a Phenopacket or Family object\"\"\" def __init__ ( self , phenopacket_contents : Union [ Phenopacket , Family ]): \"\"\"Initialise PhenopacketUtil Args: phenopacket_contents (Union[Phenopacket, Family]): Phenopacket or Family object \"\"\" self . phenopacket_contents = phenopacket_contents def sample_id ( self ) -> str : \"\"\" Retrieve the sample ID from a Phenopacket or proband of a Family Returns: str: Sample ID \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . subject . id else : return self . phenopacket_contents . subject . id def phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all HPO terms Returns: List[PhenotypicFeature]: List of HPO terms \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . phenotypic_features else : return self . phenopacket_contents . phenotypic_features def observed_phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all observed HPO terms Returns: List[PhenotypicFeature]: List of observed HPO terms \"\"\" phenotypic_features = [] all_phenotypic_features = self . phenotypic_features () for p in all_phenotypic_features : if p . excluded : continue phenotypic_features . append ( p ) return phenotypic_features def negated_phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all negated HPO terms Returns: List[PhenotypicFeature]: List of negated HPO terms \"\"\" negated_phenotypic_features = [] all_phenotypic_features = self . phenotypic_features () for p in all_phenotypic_features : if p . excluded : negated_phenotypic_features . append ( p ) return negated_phenotypic_features def diseases ( self ) -> List [ Disease ]: \"\"\" Retrieve a list of Diseases associated with the proband Returns: List[Disease]: List of diseases \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . diseases else : return self . phenopacket_contents . diseases def _diagnosis_from_interpretations ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a list of disease diagnoses associated with the proband from the interpretations object Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" diagnoses = [] interpretation = self . interpretations () for i in interpretation : ( diagnoses . append ( ProbandDisease ( disease_name = i . diagnosis . disease . label , disease_identifier = i . diagnosis . disease . id , ) ) if i . diagnosis . disease . label != \"\" and i . diagnosis . disease . id != \"\" else None ) return diagnoses def _diagnosis_from_disease ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a list of disease diagnoses associated with the proband from the diseases object Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" diagnoses = [] for disease in self . diseases (): diagnoses . append ( ProbandDisease ( disease_name = disease . term . label , disease_identifier = disease . term . id ) ) return diagnoses def diagnoses ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a unique list of disease diagnoses associated with the proband from a Phenopacket Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" return list ( set ( self . _diagnosis_from_interpretations () + self . _diagnosis_from_disease ())) def interpretations ( self ) -> List [ Interpretation ]: \"\"\" Retrieve a list of interpretations from a Phenopacket Returns: List[Interpretation]: List of interpretations \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . interpretations else : return self . phenopacket_contents . interpretations def causative_variants ( self ) -> List [ ProbandCausativeVariant ]: \"\"\" Retrieve a list of causative variants listed in a Phenopacket Returns: List[ProbandCausativeVariant]: List of proband causative variants \"\"\" all_variants = [] interpretation = self . interpretations () for i in interpretation : for g in i . diagnosis . genomic_interpretations : vcf_record = g . variant_interpretation . variation_descriptor . vcf_record genotype = g . variant_interpretation . variation_descriptor . allelic_state variant_data = ProbandCausativeVariant ( self . phenopacket_contents . subject . id , vcf_record . genome_assembly , GenomicVariant ( vcf_record . chrom , vcf_record . pos , vcf_record . ref , vcf_record . alt , ), genotype . label , vcf_record . info , ) all_variants . append ( variant_data ) return all_variants def files ( self ) -> List [ File ]: \"\"\" Retrieve a list of files associated with a phenopacket Returns: List[File]: List of files associated with a phenopacket \"\"\" return self . phenopacket_contents . files def vcf_file_data ( self , phenopacket_path : Path , vcf_dir : Path ) -> File : \"\"\" Retrieve the genome assembly and VCF file name from a phenopacket. Args: phenopacket_path (Path): The path to the phenopacket file. vcf_dir (Path): The directory path where the VCF file is stored. Returns: File: The VCF file with updated URI pointing to the specified directory. Raises: IncorrectFileFormatError: If the provided file is not in .vcf or .vcf.gz format. IncompatibleGenomeAssemblyError: If the genome assembly of the VCF file is not compatible. Note: This function searches for a VCF file within the provided list of files, validates its format, and checks if the genome assembly is compatible. If the conditions are met, it updates the URI of the VCF file to the specified directory and returns the modified file object. \"\"\" compatible_genome_assembly = [ \"GRCh37\" , \"hg19\" , \"GRCh38\" , \"hg38\" ] vcf_data = [ file for file in self . files () if file . file_attributes [ \"fileFormat\" ] == \"vcf\" ][ 0 ] if not Path ( vcf_data . uri ) . name . endswith ( \".vcf\" ) and not Path ( vcf_data . uri ) . name . endswith ( \".vcf.gz\" ): raise IncorrectFileFormatError ( Path ( vcf_data . uri ), \".vcf or .vcf.gz file\" ) if vcf_data . file_attributes [ \"genomeAssembly\" ] not in compatible_genome_assembly : raise IncompatibleGenomeAssemblyError ( vcf_data . file_attributes [ \"genomeAssembly\" ], phenopacket_path ) vcf_data . uri = str ( vcf_dir . joinpath ( Path ( vcf_data . uri ) . name )) return vcf_data @staticmethod def _extract_diagnosed_gene ( genomic_interpretation : GenomicInterpretation , ) -> ProbandCausativeGene : \"\"\" Retrieve the disease causing genes from the variant descriptor field if not empty, otherwise, retrieves from the gene descriptor from a phenopacket. Args: genomic_interpretation (GenomicInterpretation): A genomic interpretation from a Phenopacket Returns: ProbandCausativeGene: The disease causing gene \"\"\" if genomic_interpretation . variant_interpretation . ByteSize () != 0 : return ProbandCausativeGene ( genomic_interpretation . variant_interpretation . variation_descriptor . gene_context . symbol , genomic_interpretation . variant_interpretation . variation_descriptor . gene_context . value_id , ) else : return ProbandCausativeGene ( gene_symbol = genomic_interpretation . gene . symbol , gene_identifier = genomic_interpretation . gene . value_id , ) def diagnosed_genes ( self ) -> List [ ProbandCausativeGene ]: \"\"\" Retrieve the disease causing genes from a phenopacket. Returns: List[ProbandCausativeGene]: List of causative genes \"\"\" pheno_interpretation = self . interpretations () genes = [] for i in pheno_interpretation : for g in i . diagnosis . genomic_interpretations : genes . append ( self . _extract_diagnosed_gene ( g )) genes = list ({ gene . gene_symbol : gene for gene in genes } . values ()) return genes def diagnosed_variants ( self ) -> List [ GenomicVariant ]: \"\"\" Retrieve a list of all known causative variants from a phenopacket. Returns: List[GenomicVariant]: List of causative variants \"\"\" variants = [] pheno_interpretation = self . interpretations () for i in pheno_interpretation : for g in i . diagnosis . genomic_interpretations : variant = GenomicVariant ( chrom = str ( g . variant_interpretation . variation_descriptor . vcf_record . chrom . replace ( \"chr\" , \"\" ) ), pos = int ( g . variant_interpretation . variation_descriptor . vcf_record . pos ), ref = g . variant_interpretation . variation_descriptor . vcf_record . ref , alt = g . variant_interpretation . variation_descriptor . vcf_record . alt , ) variants . append ( variant ) return variants def check_incomplete_variant_record ( self ) -> bool : \"\"\" Check if any variant record in the phenopacket has incomplete information. This method iterates through the diagnosed variant records and checks if any of them have missing or incomplete information such as empty chromosome, position, reference, or alternate allele. Returns: bool: True if any variant record is incomplete, False otherwise. \"\"\" variants = self . diagnosed_variants () for variant in variants : if ( variant . chrom == \"\" or variant . pos == 0 or variant . pos == \"\" or variant . ref == \"\" or variant . alt == \"\" ): return True return False def check_incomplete_gene_record ( self ) -> bool : \"\"\" Check if any gene record in the phenopacket has incomplete information. This method iterates through the diagnosed gene records and checks if any of them have missing or incomplete information such as gene name, or gene identifier. Returns: bool: True if any gene record is incomplete, False otherwise. \"\"\" genes = self . diagnosed_genes () for gene in genes : if gene . gene_symbol == \"\" or gene . gene_identifier == \"\" : return True return False def check_incomplete_disease_record ( self ) -> bool : \"\"\" Check if any disease record in the phenopacket has incomplete information. This method iterates through the diagnosed disease records and checks if any of them have missing or incomplete information such as empty disease name, or disease identifier. Returns: bool: True if any disease record is incomplete, False otherwise. \"\"\" if len ( self . diagnoses ()) == 0 : return True return False","title":"PhenopacketUtil"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.__init__","text":"Initialise PhenopacketUtil Parameters: Name Type Description Default phenopacket_contents Union [ Phenopacket , Family ] Phenopacket or Family object required Source code in src/pheval/utils/phenopacket_utils.py 222 223 224 225 226 227 228 def __init__ ( self , phenopacket_contents : Union [ Phenopacket , Family ]): \"\"\"Initialise PhenopacketUtil Args: phenopacket_contents (Union[Phenopacket, Family]): Phenopacket or Family object \"\"\" self . phenopacket_contents = phenopacket_contents","title":"__init__"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.causative_variants","text":"Retrieve a list of causative variants listed in a Phenopacket Returns: Type Description List [ ProbandCausativeVariant ] List[ProbandCausativeVariant]: List of proband causative variants Source code in src/pheval/utils/phenopacket_utils.py 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 def causative_variants ( self ) -> List [ ProbandCausativeVariant ]: \"\"\" Retrieve a list of causative variants listed in a Phenopacket Returns: List[ProbandCausativeVariant]: List of proband causative variants \"\"\" all_variants = [] interpretation = self . interpretations () for i in interpretation : for g in i . diagnosis . genomic_interpretations : vcf_record = g . variant_interpretation . variation_descriptor . vcf_record genotype = g . variant_interpretation . variation_descriptor . allelic_state variant_data = ProbandCausativeVariant ( self . phenopacket_contents . subject . id , vcf_record . genome_assembly , GenomicVariant ( vcf_record . chrom , vcf_record . pos , vcf_record . ref , vcf_record . alt , ), genotype . label , vcf_record . info , ) all_variants . append ( variant_data ) return all_variants","title":"causative_variants"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.check_incomplete_disease_record","text":"Check if any disease record in the phenopacket has incomplete information. This method iterates through the diagnosed disease records and checks if any of them have missing or incomplete information such as empty disease name, or disease identifier. Returns: Name Type Description bool bool True if any disease record is incomplete, False otherwise. Source code in src/pheval/utils/phenopacket_utils.py 522 523 524 525 526 527 528 529 530 531 532 533 534 def check_incomplete_disease_record ( self ) -> bool : \"\"\" Check if any disease record in the phenopacket has incomplete information. This method iterates through the diagnosed disease records and checks if any of them have missing or incomplete information such as empty disease name, or disease identifier. Returns: bool: True if any disease record is incomplete, False otherwise. \"\"\" if len ( self . diagnoses ()) == 0 : return True return False","title":"check_incomplete_disease_record"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.check_incomplete_gene_record","text":"Check if any gene record in the phenopacket has incomplete information. This method iterates through the diagnosed gene records and checks if any of them have missing or incomplete information such as gene name, or gene identifier. Returns: Name Type Description bool bool True if any gene record is incomplete, False otherwise. Source code in src/pheval/utils/phenopacket_utils.py 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 def check_incomplete_gene_record ( self ) -> bool : \"\"\" Check if any gene record in the phenopacket has incomplete information. This method iterates through the diagnosed gene records and checks if any of them have missing or incomplete information such as gene name, or gene identifier. Returns: bool: True if any gene record is incomplete, False otherwise. \"\"\" genes = self . diagnosed_genes () for gene in genes : if gene . gene_symbol == \"\" or gene . gene_identifier == \"\" : return True return False","title":"check_incomplete_gene_record"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.check_incomplete_variant_record","text":"Check if any variant record in the phenopacket has incomplete information. This method iterates through the diagnosed variant records and checks if any of them have missing or incomplete information such as empty chromosome, position, reference, or alternate allele. Returns: Name Type Description bool bool True if any variant record is incomplete, False otherwise. Source code in src/pheval/utils/phenopacket_utils.py 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 def check_incomplete_variant_record ( self ) -> bool : \"\"\" Check if any variant record in the phenopacket has incomplete information. This method iterates through the diagnosed variant records and checks if any of them have missing or incomplete information such as empty chromosome, position, reference, or alternate allele. Returns: bool: True if any variant record is incomplete, False otherwise. \"\"\" variants = self . diagnosed_variants () for variant in variants : if ( variant . chrom == \"\" or variant . pos == 0 or variant . pos == \"\" or variant . ref == \"\" or variant . alt == \"\" ): return True return False","title":"check_incomplete_variant_record"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.diagnosed_genes","text":"Retrieve the disease causing genes from a phenopacket. Returns: List[ProbandCausativeGene]: List of causative genes Source code in src/pheval/utils/phenopacket_utils.py 446 447 448 449 450 451 452 453 454 455 456 457 458 def diagnosed_genes ( self ) -> List [ ProbandCausativeGene ]: \"\"\" Retrieve the disease causing genes from a phenopacket. Returns: List[ProbandCausativeGene]: List of causative genes \"\"\" pheno_interpretation = self . interpretations () genes = [] for i in pheno_interpretation : for g in i . diagnosis . genomic_interpretations : genes . append ( self . _extract_diagnosed_gene ( g )) genes = list ({ gene . gene_symbol : gene for gene in genes } . values ()) return genes","title":"diagnosed_genes"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.diagnosed_variants","text":"Retrieve a list of all known causative variants from a phenopacket. Returns: List[GenomicVariant]: List of causative variants Source code in src/pheval/utils/phenopacket_utils.py 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 def diagnosed_variants ( self ) -> List [ GenomicVariant ]: \"\"\" Retrieve a list of all known causative variants from a phenopacket. Returns: List[GenomicVariant]: List of causative variants \"\"\" variants = [] pheno_interpretation = self . interpretations () for i in pheno_interpretation : for g in i . diagnosis . genomic_interpretations : variant = GenomicVariant ( chrom = str ( g . variant_interpretation . variation_descriptor . vcf_record . chrom . replace ( \"chr\" , \"\" ) ), pos = int ( g . variant_interpretation . variation_descriptor . vcf_record . pos ), ref = g . variant_interpretation . variation_descriptor . vcf_record . ref , alt = g . variant_interpretation . variation_descriptor . vcf_record . alt , ) variants . append ( variant ) return variants","title":"diagnosed_variants"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.diagnoses","text":"Retrieve a unique list of disease diagnoses associated with the proband from a Phenopacket Returns: Type Description List [ ProbandDisease ] List[ProbandDisease]: List of diagnosed diseases Source code in src/pheval/utils/phenopacket_utils.py 331 332 333 334 335 336 337 338 def diagnoses ( self ) -> List [ ProbandDisease ]: \"\"\" Retrieve a unique list of disease diagnoses associated with the proband from a Phenopacket Returns: List[ProbandDisease]: List of diagnosed diseases \"\"\" return list ( set ( self . _diagnosis_from_interpretations () + self . _diagnosis_from_disease ()))","title":"diagnoses"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.diseases","text":"Retrieve a list of Diseases associated with the proband Returns: Type Description List [ Disease ] List[Disease]: List of diseases Source code in src/pheval/utils/phenopacket_utils.py 283 284 285 286 287 288 289 290 291 292 293 def diseases ( self ) -> List [ Disease ]: \"\"\" Retrieve a list of Diseases associated with the proband Returns: List[Disease]: List of diseases \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . diseases else : return self . phenopacket_contents . diseases","title":"diseases"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.files","text":"Retrieve a list of files associated with a phenopacket Returns: Type Description List [ File ] List[File]: List of files associated with a phenopacket Source code in src/pheval/utils/phenopacket_utils.py 380 381 382 383 384 385 386 387 def files ( self ) -> List [ File ]: \"\"\" Retrieve a list of files associated with a phenopacket Returns: List[File]: List of files associated with a phenopacket \"\"\" return self . phenopacket_contents . files","title":"files"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.interpretations","text":"Retrieve a list of interpretations from a Phenopacket Returns: Type Description List [ Interpretation ] List[Interpretation]: List of interpretations Source code in src/pheval/utils/phenopacket_utils.py 340 341 342 343 344 345 346 347 348 349 350 def interpretations ( self ) -> List [ Interpretation ]: \"\"\" Retrieve a list of interpretations from a Phenopacket Returns: List[Interpretation]: List of interpretations \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . interpretations else : return self . phenopacket_contents . interpretations","title":"interpretations"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.negated_phenotypic_features","text":"Retrieve a list of all negated HPO terms Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: List of negated HPO terms Source code in src/pheval/utils/phenopacket_utils.py 269 270 271 272 273 274 275 276 277 278 279 280 281 def negated_phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all negated HPO terms Returns: List[PhenotypicFeature]: List of negated HPO terms \"\"\" negated_phenotypic_features = [] all_phenotypic_features = self . phenotypic_features () for p in all_phenotypic_features : if p . excluded : negated_phenotypic_features . append ( p ) return negated_phenotypic_features","title":"negated_phenotypic_features"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.observed_phenotypic_features","text":"Retrieve a list of all observed HPO terms Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: List of observed HPO terms Source code in src/pheval/utils/phenopacket_utils.py 254 255 256 257 258 259 260 261 262 263 264 265 266 267 def observed_phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all observed HPO terms Returns: List[PhenotypicFeature]: List of observed HPO terms \"\"\" phenotypic_features = [] all_phenotypic_features = self . phenotypic_features () for p in all_phenotypic_features : if p . excluded : continue phenotypic_features . append ( p ) return phenotypic_features","title":"observed_phenotypic_features"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.phenotypic_features","text":"Retrieve a list of all HPO terms Returns: Type Description List [ PhenotypicFeature ] List[PhenotypicFeature]: List of HPO terms Source code in src/pheval/utils/phenopacket_utils.py 242 243 244 245 246 247 248 249 250 251 252 def phenotypic_features ( self ) -> List [ PhenotypicFeature ]: \"\"\" Retrieve a list of all HPO terms Returns: List[PhenotypicFeature]: List of HPO terms \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . phenotypic_features else : return self . phenopacket_contents . phenotypic_features","title":"phenotypic_features"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.sample_id","text":"Retrieve the sample ID from a Phenopacket or proband of a Family Returns: Name Type Description str str Sample ID Source code in src/pheval/utils/phenopacket_utils.py 230 231 232 233 234 235 236 237 238 239 240 def sample_id ( self ) -> str : \"\"\" Retrieve the sample ID from a Phenopacket or proband of a Family Returns: str: Sample ID \"\"\" if hasattr ( self . phenopacket_contents , \"proband\" ): return self . phenopacket_contents . proband . subject . id else : return self . phenopacket_contents . subject . id","title":"sample_id"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.PhenopacketUtil.vcf_file_data","text":"Retrieve the genome assembly and VCF file name from a phenopacket. Parameters: Name Type Description Default phenopacket_path Path The path to the phenopacket file. required vcf_dir Path The directory path where the VCF file is stored. required Returns: Name Type Description File File The VCF file with updated URI pointing to the specified directory. Raises: Type Description IncorrectFileFormatError If the provided file is not in .vcf or .vcf.gz format. IncompatibleGenomeAssemblyError If the genome assembly of the VCF file is not compatible. Note This function searches for a VCF file within the provided list of files, validates its format, and checks if the genome assembly is compatible. If the conditions are met, it updates the URI of the VCF file to the specified directory and returns the modified file object. Source code in src/pheval/utils/phenopacket_utils.py 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 def vcf_file_data ( self , phenopacket_path : Path , vcf_dir : Path ) -> File : \"\"\" Retrieve the genome assembly and VCF file name from a phenopacket. Args: phenopacket_path (Path): The path to the phenopacket file. vcf_dir (Path): The directory path where the VCF file is stored. Returns: File: The VCF file with updated URI pointing to the specified directory. Raises: IncorrectFileFormatError: If the provided file is not in .vcf or .vcf.gz format. IncompatibleGenomeAssemblyError: If the genome assembly of the VCF file is not compatible. Note: This function searches for a VCF file within the provided list of files, validates its format, and checks if the genome assembly is compatible. If the conditions are met, it updates the URI of the VCF file to the specified directory and returns the modified file object. \"\"\" compatible_genome_assembly = [ \"GRCh37\" , \"hg19\" , \"GRCh38\" , \"hg38\" ] vcf_data = [ file for file in self . files () if file . file_attributes [ \"fileFormat\" ] == \"vcf\" ][ 0 ] if not Path ( vcf_data . uri ) . name . endswith ( \".vcf\" ) and not Path ( vcf_data . uri ) . name . endswith ( \".vcf.gz\" ): raise IncorrectFileFormatError ( Path ( vcf_data . uri ), \".vcf or .vcf.gz file\" ) if vcf_data . file_attributes [ \"genomeAssembly\" ] not in compatible_genome_assembly : raise IncompatibleGenomeAssemblyError ( vcf_data . file_attributes [ \"genomeAssembly\" ], phenopacket_path ) vcf_data . uri = str ( vcf_dir . joinpath ( Path ( vcf_data . uri ) . name )) return vcf_data","title":"vcf_file_data"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.ProbandCausativeGene","text":"Represents a causative gene associated with a proband Parameters: Name Type Description Default gene_symbol str Symbol representing the gene required gene_identifier str The ENSEMBL gene identifier for the result entry required Notes: While we recommend providing the gene identifier in the ENSEMBL namespace, any matching format used in Phenopacket interpretations and result output is acceptable for result matching purposes in the analysis. Source code in src/pheval/utils/phenopacket_utils.py 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 @dataclass class ProbandCausativeGene : \"\"\" Represents a causative gene associated with a proband Args: gene_symbol (str): Symbol representing the gene gene_identifier (str): The ENSEMBL gene identifier for the result entry Notes: While we recommend providing the gene identifier in the ENSEMBL namespace, any matching format used in Phenopacket interpretations and result output is acceptable for result matching purposes in the analysis. \"\"\" gene_symbol : str gene_identifier : str","title":"ProbandCausativeGene"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.ProbandCausativeVariant","text":"Represents a causative variant associated with a proband Parameters: Name Type Description Default proband_id str ID of the proband required assembly str Genome assembly required variant GenomicVariant Genomic variant associated with the proband required genotype str Genotype information for the variant required info str Additional information about the variant (default is an empty string) '' Source code in src/pheval/utils/phenopacket_utils.py 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 @dataclass class ProbandCausativeVariant : \"\"\" Represents a causative variant associated with a proband Args: proband_id (str): ID of the proband assembly (str): Genome assembly variant (GenomicVariant): Genomic variant associated with the proband genotype (str): Genotype information for the variant info (str, optional): Additional information about the variant (default is an empty string) \"\"\" proband_id : str assembly : str variant : GenomicVariant genotype : str info : str = \"\"","title":"ProbandCausativeVariant"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.ProbandDisease","text":"Represents a disease associated with a proband Parameters: Name Type Description Default disease_name str Name of the disease required disease_identifier str Identifier for the disease result entry in the OMIM namespace required Notes While we recommend providing the disease identifier in the OMIM namespace, any matching format used in Phenopacket interpretations and result output is acceptable for result matching purposes in the analysis. Source code in src/pheval/utils/phenopacket_utils.py 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 @dataclass ( frozen = True , eq = True ) class ProbandDisease : \"\"\" Represents a disease associated with a proband Args: disease_name (str): Name of the disease disease_identifier (str): Identifier for the disease result entry in the OMIM namespace Notes: While we recommend providing the disease identifier in the OMIM namespace, any matching format used in Phenopacket interpretations and result output is acceptable for result matching purposes in the analysis. \"\"\" disease_name : str disease_identifier : str","title":"ProbandDisease"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.create_gene_identifier_map","text":"Create a mapping of gene identifiers to gene symbols using HGNC data. Returns: Name Type Description dict dict A mapping of gene identifiers to gene symbols. Notes The dictionary structure: { 'identifier': 'gene_symbol', ... } Source code in src/pheval/utils/phenopacket_utils.py 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 def create_gene_identifier_map () -> dict : \"\"\" Create a mapping of gene identifiers to gene symbols using HGNC data. Returns: dict: A mapping of gene identifiers to gene symbols. Notes: The dictionary structure: { 'identifier': 'gene_symbol', ... } \"\"\" hgnc_df = read_hgnc_data () identifier_map = {} for _index , row in hgnc_df . iterrows (): identifier_map [ row [ \"ensembl_gene_id\" ]] = row [ \"symbol\" ] identifier_map [ row [ \"hgnc_id\" ]] = row [ \"symbol\" ] identifier_map [ row [ \"entrez_id\" ]] = row [ \"symbol\" ] identifier_map [ row [ \"refseq_accession\" ]] = row [ \"symbol\" ] return identifier_map","title":"create_gene_identifier_map"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.create_hgnc_dict","text":"Create a dictionary as a reference for updating gene symbols and identifiers based on HGNC data. Returns: Name Type Description defaultdict defaultdict A dictionary containing gene symbols as keys and their associated gene information. Notes The dictionary structure: { 'gene_symbol': { 'ensembl_id': str, 'hgnc_id': str, 'entrez_id': str, 'refseq_accession': str, 'previous_symbol': [str, ...] }, ... } Source code in src/pheval/utils/phenopacket_utils.py 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 def create_hgnc_dict () -> defaultdict : \"\"\" Create a dictionary as a reference for updating gene symbols and identifiers based on HGNC data. Returns: defaultdict: A dictionary containing gene symbols as keys and their associated gene information. Notes: The dictionary structure: { 'gene_symbol': { 'ensembl_id': str, 'hgnc_id': str, 'entrez_id': str, 'refseq_accession': str, 'previous_symbol': [str, ...] }, ... } \"\"\" hgnc_df = read_hgnc_data () hgnc_data = defaultdict ( dict ) for _index , row in hgnc_df . iterrows (): previous_names = [] hgnc_data [ row [ \"symbol\" ]][ \"ensembl_id\" ] = row [ \"ensembl_gene_id\" ] hgnc_data [ row [ \"symbol\" ]][ \"hgnc_id\" ] = row [ \"hgnc_id\" ] hgnc_data [ row [ \"symbol\" ]][ \"entrez_id\" ] = row [ \"entrez_id\" ] hgnc_data [ row [ \"symbol\" ]][ \"refseq_accession\" ] = row [ \"refseq_accession\" ] previous = str ( row [ \"prev_symbol\" ]) . split ( \"|\" ) for p in previous : previous_names . append ( p . strip ( '\"' )) hgnc_data [ row [ \"symbol\" ]][ \"previous_symbol\" ] = previous_names return hgnc_data","title":"create_hgnc_dict"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.create_json_message","text":"Create a JSON message for writing to a file. Args: - phenopacket (Union[Phenopacket, Family]): The Phenopacket or Family object to convert to JSON. Returns: - str: A JSON-formatted string representation of the Phenopacket or Family object. Source code in src/pheval/utils/phenopacket_utils.py 608 609 610 611 612 613 614 615 616 617 618 def create_json_message ( phenopacket : Union [ Phenopacket , Family ]) -> str : \"\"\" Create a JSON message for writing to a file. Args: - phenopacket (Union[Phenopacket, Family]): The Phenopacket or Family object to convert to JSON. Returns: - str: A JSON-formatted string representation of the Phenopacket or Family object. \"\"\" return MessageToJson ( phenopacket )","title":"create_json_message"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.phenopacket_reader","text":"Read a Phenopacket file and returns its contents as a Phenopacket or Family object Parameters: Name Type Description Default file Path Path to the Phenopacket file required Returns: Type Description Union [ Phenopacket , Family ] Union[Phenopacket, Family]: Contents of the Phenopacket file as a Phenopacket or Family object Source code in src/pheval/utils/phenopacket_utils.py 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 def phenopacket_reader ( file : Path ) -> Union [ Phenopacket , Family ]: \"\"\" Read a Phenopacket file and returns its contents as a Phenopacket or Family object Args: file (Path): Path to the Phenopacket file Returns: Union[Phenopacket, Family]: Contents of the Phenopacket file as a Phenopacket or Family object \"\"\" file = open ( file , \"r\" ) phenopacket = json . load ( file ) file . close () if \"proband\" in phenopacket : return Parse ( json . dumps ( phenopacket ), Family ()) else : return Parse ( json . dumps ( phenopacket ), Phenopacket ())","title":"phenopacket_reader"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.read_hgnc_data","text":"Read HGNC data from a file and return it as a Pandas DataFrame. Returns: Type Description DataFrame pd.DataFrame: DataFrame containing the HGNC data. Source code in src/pheval/utils/phenopacket_utils.py 125 126 127 128 129 130 131 132 133 134 135 136 def read_hgnc_data () -> pd . DataFrame : \"\"\" Read HGNC data from a file and return it as a Pandas DataFrame. Returns: pd.DataFrame: DataFrame containing the HGNC data. \"\"\" return pd . read_csv ( os . path . dirname ( __file__ ) . replace ( \"utils\" , \"resources/hgnc_complete_set.txt\" ), delimiter = \" \\t \" , dtype = str , )","title":"read_hgnc_data"},{"location":"api/pheval/utils/phenopacket_utils/#src.pheval.utils.phenopacket_utils.write_phenopacket","text":"Write a Phenopacket or Family object to a file in JSON format. Parameters: Name Type Description Default phenopacket Phenopacket or Family The Phenopacket or Family object to be written. required output_file Path The Path object representing the file to write the Phenopacket data. required Returns: Type Description None None Source code in src/pheval/utils/phenopacket_utils.py 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 def write_phenopacket ( phenopacket : Union [ Phenopacket , Family ], output_file : Path ) -> None : \"\"\" Write a Phenopacket or Family object to a file in JSON format. Args: phenopacket (Phenopacket or Family): The Phenopacket or Family object to be written. output_file (Path): The Path object representing the file to write the Phenopacket data. Returns: None \"\"\" phenopacket_json = create_json_message ( phenopacket ) with open ( output_file , \"w\" ) as outfile : outfile . write ( phenopacket_json ) outfile . close ()","title":"write_phenopacket"},{"location":"api/pheval/utils/semsim_utils/","text":"Contains all pheval utility methods diff_semsim ( semsim_left , semsim_right , score_column , absolute_diff ) Calculates score difference between two semantic similarity profiles Parameters: Name Type Description Default semsim_left DataFrame first semantic similarity dataframe required semsim_right DataFrame second semantic similarity dataframe required score_column str Score column that will be computed (e.g. jaccard_similarity) required absolute_diff bool Whether the difference is absolute (True) or percentage (False). required Returns: Type Description DataFrame pd.DataFrame: A dataframe with terms and its scores differences Source code in src/pheval/utils/semsim_utils.py 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 def diff_semsim ( semsim_left : pd . DataFrame , semsim_right : pd . DataFrame , score_column : str , absolute_diff : bool ) -> pd . DataFrame : \"\"\"Calculates score difference between two semantic similarity profiles Args: semsim_left (pd.DataFrame): first semantic similarity dataframe semsim_right (pd.DataFrame): second semantic similarity dataframe score_column (str): Score column that will be computed (e.g. jaccard_similarity) absolute_diff (bool, optional): Whether the difference is absolute (True) or percentage (False). Defaults to True. Returns: pd.DataFrame: A dataframe with terms and its scores differences \"\"\" df = pd . merge ( semsim_left , semsim_right , on = [ \"subject_id\" , \"object_id\" ], how = \"outer\" ) if absolute_diff : df [ \"diff\" ] = df [ f \" { score_column } _x\" ] - df [ f \" { score_column } _y\" ] return df [[ \"subject_id\" , \"object_id\" , \"diff\" ]] df [ \"diff\" ] = df . apply ( lambda row : get_percentage_diff ( row [ f \" { score_column } _x\" ], row [ f \" { score_column } _y\" ]), axis = 1 ) return df [[ \"subject_id\" , \"object_id\" , f \" { score_column } _x\" , f \" { score_column } _y\" , \"diff\" ]] filter_non_0_score ( data , col ) Removes rows that have value equal to 0 based on the given column passed by col parameter Parameters: Name Type Description Default data DataFrame Dirty dataframe required col str Column to be filtered required Returns: Type Description DataFrame pd.DataFrame: Filtered dataframe Source code in src/pheval/utils/semsim_utils.py 14 15 16 17 18 19 20 21 22 23 24 def filter_non_0_score ( data : pd . DataFrame , col : str ) -> pd . DataFrame : \"\"\"Removes rows that have value equal to 0 based on the given column passed by col parameter Args: data (pd.DataFrame): Dirty dataframe col (str): Column to be filtered Returns: pd.DataFrame: Filtered dataframe \"\"\" return data [ data [ col ] != 0 ] get_percentage_diff ( current_number , previous_number ) Gets the percentage difference between two numbers Parameters: Name Type Description Default current_number float second number in comparison required previous_number float first number in comparison required Returns: Name Type Description float float percentage difference between two numbers Source code in src/pheval/utils/semsim_utils.py 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 def get_percentage_diff ( current_number : float , previous_number : float ) -> float : \"\"\"Gets the percentage difference between two numbers Args: current_number (float): second number in comparison previous_number (float): first number in comparison Returns: float: percentage difference between two numbers \"\"\" try : if current_number == previous_number : return \" {:.2%} \" . format ( 0 ) if current_number > previous_number : number = ( 1 - (( current_number / previous_number ))) * 100 else : number = ( 100 - (( previous_number / current_number ) * 100 )) * - 1 return \" {:.2%} \" . format ( number / 100 ) except ZeroDivisionError : return None parse_semsim ( df , cols ) Parses semantic similarity profiles converting the score column as a numeric value and dropping the null ones Parameters: Name Type Description Default df DataFrame semantic similarity profile dataframe required cols list list of columns that will be selected on semsim data required Returns: Type Description DataFrame pd.Dataframe: parsed semantic similarity dataframe Source code in src/pheval/utils/semsim_utils.py 27 28 29 30 31 32 33 34 35 36 37 38 39 def parse_semsim ( df : pd . DataFrame , cols : list ) -> pd . DataFrame : \"\"\"Parses semantic similarity profiles converting the score column as a numeric value and dropping the null ones Args: df (pd.DataFrame): semantic similarity profile dataframe cols (list): list of columns that will be selected on semsim data Returns: pd.Dataframe: parsed semantic similarity dataframe \"\"\" df [ cols [ - 1 ]] = pd . to_numeric ( df [ cols [ - 1 ]], errors = \"coerce\" ) df . replace ( \"None\" , numpy . nan ) . dropna ( subset = cols [ - 1 ], inplace = True ) return df percentage_diff ( semsim_left , semsim_right , score_column , output ) Compares two semantic similarity profiles Parameters: Name Type Description Default semsim_left Path File path of the first semantic similarity profile required semsim_right Path File path of the second semantic similarity profile required score_column str Score column that will be computed (e.g. jaccard_similarity) required output Path Output path for the difference tsv file required Source code in src/pheval/utils/semsim_utils.py 67 68 69 70 71 72 73 74 75 76 77 def percentage_diff ( semsim_left : Path , semsim_right : Path , score_column : str , output : Path ): \"\"\"Compares two semantic similarity profiles Args: semsim_left (Path): File path of the first semantic similarity profile semsim_right (Path): File path of the second semantic similarity profile score_column (str): Score column that will be computed (e.g. jaccard_similarity) output (Path): Output path for the difference tsv file \"\"\" clean_df = semsim_analysis ( semsim_left , semsim_right , score_column , absolute_diff = False ) clean_df . sort_values ( by = \"diff\" , ascending = False ) . to_csv ( output , sep = \" \\t \" , index = False ) semsim_analysis ( semsim_left , semsim_right , score_column , absolute_diff = True ) semsim_analysis Parameters: Name Type Description Default semsim_left Path File path of the first semantic similarity profile required semsim_right Path File path of the second semantic similarity profile required score_column str Score column that will be computed (e.g. jaccard_similarity) required absolute_diff bool Whether the difference is absolute (True) or percentage (False). True Returns: Type Description DataFrame [pd.DataFrame]: DataFrame with the differences between two semantic similarity profiles Source code in src/pheval/utils/semsim_utils.py 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 def semsim_analysis ( semsim_left : Path , semsim_right : Path , score_column : str , absolute_diff = True ) -> pd . DataFrame : \"\"\"semsim_analysis Args: semsim_left (Path): File path of the first semantic similarity profile semsim_right (Path): File path of the second semantic similarity profile score_column (str): Score column that will be computed (e.g. jaccard_similarity) absolute_diff (bool, optional): Whether the difference is absolute (True) or percentage (False). Defaults to True. Returns: [pd.DataFrame]: DataFrame with the differences between two semantic similarity profiles \"\"\" validate_semsim_file_comparison ( semsim_left , semsim_right ) cols = [ \"subject_id\" , \"object_id\" , score_column ] semsim_left = pd . read_csv ( semsim_left , sep = \" \\t \" ) semsim_right = pd . read_csv ( semsim_right , sep = \" \\t \" ) file_utils . ensure_columns_exists ( cols = cols , err_message = \"must exist in semsim dataframes\" , dataframes = [ semsim_left , semsim_right ], ) semsim_left = parse_semsim ( semsim_left , cols ) semsim_right = parse_semsim ( semsim_right , cols ) diff_df = diff_semsim ( semsim_left , semsim_right , score_column , absolute_diff ) return filter_non_0_score ( diff_df , \"diff\" ) semsim_heatmap_plot ( semsim_left , semsim_right , score_column ) Plots semantic similarity profiles heatmap Parameters: Name Type Description Default semsim_left Path File path of the first semantic similarity profile required semsim_right Path File path of the second semantic similarity profile required score_column str Score column that will be computed (e.g. jaccard_similarity) required Source code in src/pheval/utils/semsim_utils.py 80 81 82 83 84 85 86 87 88 89 90 91 def semsim_heatmap_plot ( semsim_left : Path , semsim_right : Path , score_column : str ): \"\"\"Plots semantic similarity profiles heatmap Args: semsim_left (Path): File path of the first semantic similarity profile semsim_right (Path): File path of the second semantic similarity profile score_column (str): Score column that will be computed (e.g. jaccard_similarity) \"\"\" clean_df = semsim_analysis ( semsim_left , semsim_right , score_column ) df = clean_df . pivot ( index = \"subject_id\" , columns = \"object_id\" , values = \"diff\" ) fig = px . imshow ( df , text_auto = True ) fig . show () validate_semsim_file_comparison ( semsim_left , semsim_right ) Checks if files exist and whether they're different Args: semsim_left (Path): File path of the first semantic similarity profile semsim_right (Path): File path of the second semantic similarity profile Raises: Exception: FileNotFoundException Source code in src/pheval/utils/semsim_utils.py 124 125 126 127 128 129 130 131 132 133 134 135 def validate_semsim_file_comparison ( semsim_left : Path , semsim_right : Path ): \"\"\"Checks if files exist and whether they're different Args: semsim_left (Path): File path of the first semantic similarity profile semsim_right (Path): File path of the second semantic similarity profile Raises: Exception: FileNotFoundException \"\"\" if semsim_left == semsim_right : errmsg = \"Semantic similarity profiles are equal. Make sure you have selected different files to analyze\" raise Exception ( errmsg ) file_utils . ensure_file_exists ( semsim_left , semsim_right )","title":"Semsim utils"},{"location":"api/pheval/utils/semsim_utils/#src.pheval.utils.semsim_utils.diff_semsim","text":"Calculates score difference between two semantic similarity profiles Parameters: Name Type Description Default semsim_left DataFrame first semantic similarity dataframe required semsim_right DataFrame second semantic similarity dataframe required score_column str Score column that will be computed (e.g. jaccard_similarity) required absolute_diff bool Whether the difference is absolute (True) or percentage (False). required Returns: Type Description DataFrame pd.DataFrame: A dataframe with terms and its scores differences Source code in src/pheval/utils/semsim_utils.py 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 def diff_semsim ( semsim_left : pd . DataFrame , semsim_right : pd . DataFrame , score_column : str , absolute_diff : bool ) -> pd . DataFrame : \"\"\"Calculates score difference between two semantic similarity profiles Args: semsim_left (pd.DataFrame): first semantic similarity dataframe semsim_right (pd.DataFrame): second semantic similarity dataframe score_column (str): Score column that will be computed (e.g. jaccard_similarity) absolute_diff (bool, optional): Whether the difference is absolute (True) or percentage (False). Defaults to True. Returns: pd.DataFrame: A dataframe with terms and its scores differences \"\"\" df = pd . merge ( semsim_left , semsim_right , on = [ \"subject_id\" , \"object_id\" ], how = \"outer\" ) if absolute_diff : df [ \"diff\" ] = df [ f \" { score_column } _x\" ] - df [ f \" { score_column } _y\" ] return df [[ \"subject_id\" , \"object_id\" , \"diff\" ]] df [ \"diff\" ] = df . apply ( lambda row : get_percentage_diff ( row [ f \" { score_column } _x\" ], row [ f \" { score_column } _y\" ]), axis = 1 ) return df [[ \"subject_id\" , \"object_id\" , f \" { score_column } _x\" , f \" { score_column } _y\" , \"diff\" ]]","title":"diff_semsim"},{"location":"api/pheval/utils/semsim_utils/#src.pheval.utils.semsim_utils.filter_non_0_score","text":"Removes rows that have value equal to 0 based on the given column passed by col parameter Parameters: Name Type Description Default data DataFrame Dirty dataframe required col str Column to be filtered required Returns: Type Description DataFrame pd.DataFrame: Filtered dataframe Source code in src/pheval/utils/semsim_utils.py 14 15 16 17 18 19 20 21 22 23 24 def filter_non_0_score ( data : pd . DataFrame , col : str ) -> pd . DataFrame : \"\"\"Removes rows that have value equal to 0 based on the given column passed by col parameter Args: data (pd.DataFrame): Dirty dataframe col (str): Column to be filtered Returns: pd.DataFrame: Filtered dataframe \"\"\" return data [ data [ col ] != 0 ]","title":"filter_non_0_score"},{"location":"api/pheval/utils/semsim_utils/#src.pheval.utils.semsim_utils.get_percentage_diff","text":"Gets the percentage difference between two numbers Parameters: Name Type Description Default current_number float second number in comparison required previous_number float first number in comparison required Returns: Name Type Description float float percentage difference between two numbers Source code in src/pheval/utils/semsim_utils.py 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 def get_percentage_diff ( current_number : float , previous_number : float ) -> float : \"\"\"Gets the percentage difference between two numbers Args: current_number (float): second number in comparison previous_number (float): first number in comparison Returns: float: percentage difference between two numbers \"\"\" try : if current_number == previous_number : return \" {:.2%} \" . format ( 0 ) if current_number > previous_number : number = ( 1 - (( current_number / previous_number ))) * 100 else : number = ( 100 - (( previous_number / current_number ) * 100 )) * - 1 return \" {:.2%} \" . format ( number / 100 ) except ZeroDivisionError : return None","title":"get_percentage_diff"},{"location":"api/pheval/utils/semsim_utils/#src.pheval.utils.semsim_utils.parse_semsim","text":"Parses semantic similarity profiles converting the score column as a numeric value and dropping the null ones Parameters: Name Type Description Default df DataFrame semantic similarity profile dataframe required cols list list of columns that will be selected on semsim data required Returns: Type Description DataFrame pd.Dataframe: parsed semantic similarity dataframe Source code in src/pheval/utils/semsim_utils.py 27 28 29 30 31 32 33 34 35 36 37 38 39 def parse_semsim ( df : pd . DataFrame , cols : list ) -> pd . DataFrame : \"\"\"Parses semantic similarity profiles converting the score column as a numeric value and dropping the null ones Args: df (pd.DataFrame): semantic similarity profile dataframe cols (list): list of columns that will be selected on semsim data Returns: pd.Dataframe: parsed semantic similarity dataframe \"\"\" df [ cols [ - 1 ]] = pd . to_numeric ( df [ cols [ - 1 ]], errors = \"coerce\" ) df . replace ( \"None\" , numpy . nan ) . dropna ( subset = cols [ - 1 ], inplace = True ) return df","title":"parse_semsim"},{"location":"api/pheval/utils/semsim_utils/#src.pheval.utils.semsim_utils.percentage_diff","text":"Compares two semantic similarity profiles Parameters: Name Type Description Default semsim_left Path File path of the first semantic similarity profile required semsim_right Path File path of the second semantic similarity profile required score_column str Score column that will be computed (e.g. jaccard_similarity) required output Path Output path for the difference tsv file required Source code in src/pheval/utils/semsim_utils.py 67 68 69 70 71 72 73 74 75 76 77 def percentage_diff ( semsim_left : Path , semsim_right : Path , score_column : str , output : Path ): \"\"\"Compares two semantic similarity profiles Args: semsim_left (Path): File path of the first semantic similarity profile semsim_right (Path): File path of the second semantic similarity profile score_column (str): Score column that will be computed (e.g. jaccard_similarity) output (Path): Output path for the difference tsv file \"\"\" clean_df = semsim_analysis ( semsim_left , semsim_right , score_column , absolute_diff = False ) clean_df . sort_values ( by = \"diff\" , ascending = False ) . to_csv ( output , sep = \" \\t \" , index = False )","title":"percentage_diff"},{"location":"api/pheval/utils/semsim_utils/#src.pheval.utils.semsim_utils.semsim_analysis","text":"semsim_analysis Parameters: Name Type Description Default semsim_left Path File path of the first semantic similarity profile required semsim_right Path File path of the second semantic similarity profile required score_column str Score column that will be computed (e.g. jaccard_similarity) required absolute_diff bool Whether the difference is absolute (True) or percentage (False). True Returns: Type Description DataFrame [pd.DataFrame]: DataFrame with the differences between two semantic similarity profiles Source code in src/pheval/utils/semsim_utils.py 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 def semsim_analysis ( semsim_left : Path , semsim_right : Path , score_column : str , absolute_diff = True ) -> pd . DataFrame : \"\"\"semsim_analysis Args: semsim_left (Path): File path of the first semantic similarity profile semsim_right (Path): File path of the second semantic similarity profile score_column (str): Score column that will be computed (e.g. jaccard_similarity) absolute_diff (bool, optional): Whether the difference is absolute (True) or percentage (False). Defaults to True. Returns: [pd.DataFrame]: DataFrame with the differences between two semantic similarity profiles \"\"\" validate_semsim_file_comparison ( semsim_left , semsim_right ) cols = [ \"subject_id\" , \"object_id\" , score_column ] semsim_left = pd . read_csv ( semsim_left , sep = \" \\t \" ) semsim_right = pd . read_csv ( semsim_right , sep = \" \\t \" ) file_utils . ensure_columns_exists ( cols = cols , err_message = \"must exist in semsim dataframes\" , dataframes = [ semsim_left , semsim_right ], ) semsim_left = parse_semsim ( semsim_left , cols ) semsim_right = parse_semsim ( semsim_right , cols ) diff_df = diff_semsim ( semsim_left , semsim_right , score_column , absolute_diff ) return filter_non_0_score ( diff_df , \"diff\" )","title":"semsim_analysis"},{"location":"api/pheval/utils/semsim_utils/#src.pheval.utils.semsim_utils.semsim_heatmap_plot","text":"Plots semantic similarity profiles heatmap Parameters: Name Type Description Default semsim_left Path File path of the first semantic similarity profile required semsim_right Path File path of the second semantic similarity profile required score_column str Score column that will be computed (e.g. jaccard_similarity) required Source code in src/pheval/utils/semsim_utils.py 80 81 82 83 84 85 86 87 88 89 90 91 def semsim_heatmap_plot ( semsim_left : Path , semsim_right : Path , score_column : str ): \"\"\"Plots semantic similarity profiles heatmap Args: semsim_left (Path): File path of the first semantic similarity profile semsim_right (Path): File path of the second semantic similarity profile score_column (str): Score column that will be computed (e.g. jaccard_similarity) \"\"\" clean_df = semsim_analysis ( semsim_left , semsim_right , score_column ) df = clean_df . pivot ( index = \"subject_id\" , columns = \"object_id\" , values = \"diff\" ) fig = px . imshow ( df , text_auto = True ) fig . show ()","title":"semsim_heatmap_plot"},{"location":"api/pheval/utils/semsim_utils/#src.pheval.utils.semsim_utils.validate_semsim_file_comparison","text":"Checks if files exist and whether they're different Args: semsim_left (Path): File path of the first semantic similarity profile semsim_right (Path): File path of the second semantic similarity profile Raises: Exception: FileNotFoundException Source code in src/pheval/utils/semsim_utils.py 124 125 126 127 128 129 130 131 132 133 134 135 def validate_semsim_file_comparison ( semsim_left : Path , semsim_right : Path ): \"\"\"Checks if files exist and whether they're different Args: semsim_left (Path): File path of the first semantic similarity profile semsim_right (Path): File path of the second semantic similarity profile Raises: Exception: FileNotFoundException \"\"\" if semsim_left == semsim_right : errmsg = \"Semantic similarity profiles are equal. Make sure you have selected different files to analyze\" raise Exception ( errmsg ) file_utils . ensure_file_exists ( semsim_left , semsim_right )","title":"validate_semsim_file_comparison"},{"location":"api/pheval/utils/utils/","text":"Contains all pheval utility methods rand ( df , min_num , max_num , scramble_factor ) Numeric scrambling Args: df (pd.DataFrame): dataframe records min_num (int): min value from this records max_num (int): max value from this records scramble_factor (float): scramble factor scalar Returns: float: randomized number Source code in src/pheval/utils/utils.py 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 def rand ( df : pd . DataFrame , min_num : int , max_num : int , scramble_factor : float ) -> float : \"\"\" Numeric scrambling Args: df (pd.DataFrame): dataframe records min_num (int): min value from this records max_num (int): max value from this records scramble_factor (float): scramble factor scalar Returns: float: randomized number \"\"\" try : return df + ( random . uniform ( min_num , max_num ) * scramble_factor ) except TypeError as err : info_log . error ( df , exc_info = err ) return df semsim_scramble ( input , output , columns_to_be_scrambled , scramble_factor = 0.5 ) Scrambles semantic similarity profile with a magnitude between 0 and 1 (scramble_factor: 0 means no scrambling and 1 means complete randomisation). It then randomises the above scores with a degree of the scramble_factor and returns a scrambles pandas dataframe. Args: input (Path): scramble_factor (float) scalar scramble factor columns_to_be_scrambled (List[str]): columns that will be scrambled in semsim file (e.g. jaccard_similarity). output (Path) Returns: pd.Dataframe: scrambled dataframe Source code in src/pheval/utils/utils.py 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 def semsim_scramble ( input : Path , output : Path , columns_to_be_scrambled : List [ str ], scramble_factor : float = 0.5 , ) -> pd . DataFrame : \"\"\" Scrambles semantic similarity profile with a magnitude between 0 and 1 (scramble_factor: 0 means no scrambling and 1 means complete randomisation). It then randomises the above scores with a degree of the scramble_factor and returns a scrambles pandas dataframe. Args: input (Path): scramble_factor (float) scalar scramble factor columns_to_be_scrambled (List[str]): columns that will be scrambled in semsim file (e.g. jaccard_similarity). output (Path) Returns: pd.Dataframe: scrambled dataframe \"\"\" semsim = pd . read_csv ( input , sep = \" \\t \" ) dataframe = semsim_scramble_df ( semsim , columns_to_be_scrambled , scramble_factor ) dataframe . to_csv ( output , sep = \" \\t \" , index = False ) semsim_scramble_df ( dataframe , columns_to_be_scrambled , scramble_factor ) scramble_semsim_df Args: dataframe (pd.DataFrame): dataframe that contains semsim profile scramble_factor (float) scalar scramble factor columns_to_be_scrambled (List[str]): Returns: pd.Dataframe: scrambled dataframe Source code in src/pheval/utils/utils.py 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 def semsim_scramble_df ( dataframe : pd . DataFrame , columns_to_be_scrambled : List [ str ], scramble_factor : float , ) -> pd . DataFrame : \"\"\"scramble_semsim_df Args: dataframe (pd.DataFrame): dataframe that contains semsim profile scramble_factor (float) scalar scramble factor columns_to_be_scrambled (List[str]): Returns: pd.Dataframe: scrambled dataframe \"\"\" for col in columns_to_be_scrambled : min_num = dataframe [ col ] . min () max_num = dataframe [ col ] . max () dataframe [ col ] = dataframe [ col ] . apply ( rand , args = ( min_num , max_num , scramble_factor )) return dataframe","title":"Utils"},{"location":"api/pheval/utils/utils/#src.pheval.utils.utils.rand","text":"Numeric scrambling Args: df (pd.DataFrame): dataframe records min_num (int): min value from this records max_num (int): max value from this records scramble_factor (float): scramble factor scalar Returns: float: randomized number Source code in src/pheval/utils/utils.py 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 def rand ( df : pd . DataFrame , min_num : int , max_num : int , scramble_factor : float ) -> float : \"\"\" Numeric scrambling Args: df (pd.DataFrame): dataframe records min_num (int): min value from this records max_num (int): max value from this records scramble_factor (float): scramble factor scalar Returns: float: randomized number \"\"\" try : return df + ( random . uniform ( min_num , max_num ) * scramble_factor ) except TypeError as err : info_log . error ( df , exc_info = err ) return df","title":"rand"},{"location":"api/pheval/utils/utils/#src.pheval.utils.utils.semsim_scramble","text":"Scrambles semantic similarity profile with a magnitude between 0 and 1 (scramble_factor: 0 means no scrambling and 1 means complete randomisation). It then randomises the above scores with a degree of the scramble_factor and returns a scrambles pandas dataframe. Args: input (Path): scramble_factor (float) scalar scramble factor columns_to_be_scrambled (List[str]): columns that will be scrambled in semsim file (e.g. jaccard_similarity). output (Path) Returns: pd.Dataframe: scrambled dataframe Source code in src/pheval/utils/utils.py 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 def semsim_scramble ( input : Path , output : Path , columns_to_be_scrambled : List [ str ], scramble_factor : float = 0.5 , ) -> pd . DataFrame : \"\"\" Scrambles semantic similarity profile with a magnitude between 0 and 1 (scramble_factor: 0 means no scrambling and 1 means complete randomisation). It then randomises the above scores with a degree of the scramble_factor and returns a scrambles pandas dataframe. Args: input (Path): scramble_factor (float) scalar scramble factor columns_to_be_scrambled (List[str]): columns that will be scrambled in semsim file (e.g. jaccard_similarity). output (Path) Returns: pd.Dataframe: scrambled dataframe \"\"\" semsim = pd . read_csv ( input , sep = \" \\t \" ) dataframe = semsim_scramble_df ( semsim , columns_to_be_scrambled , scramble_factor ) dataframe . to_csv ( output , sep = \" \\t \" , index = False )","title":"semsim_scramble"},{"location":"api/pheval/utils/utils/#src.pheval.utils.utils.semsim_scramble_df","text":"scramble_semsim_df Args: dataframe (pd.DataFrame): dataframe that contains semsim profile scramble_factor (float) scalar scramble factor columns_to_be_scrambled (List[str]): Returns: pd.Dataframe: scrambled dataframe Source code in src/pheval/utils/utils.py 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 def semsim_scramble_df ( dataframe : pd . DataFrame , columns_to_be_scrambled : List [ str ], scramble_factor : float , ) -> pd . DataFrame : \"\"\"scramble_semsim_df Args: dataframe (pd.DataFrame): dataframe that contains semsim profile scramble_factor (float) scalar scramble factor columns_to_be_scrambled (List[str]): Returns: pd.Dataframe: scrambled dataframe \"\"\" for col in columns_to_be_scrambled : min_num = dataframe [ col ] . min () max_num = dataframe [ col ] . max () dataframe [ col ] = dataframe [ col ] . apply ( rand , args = ( min_num , max_num , scramble_factor )) return dataframe","title":"semsim_scramble_df"}]}
\ No newline at end of file
diff --git a/sitemap.xml b/sitemap.xml
index 89e5fefa0..2539f44ad 100644
--- a/sitemap.xml
+++ b/sitemap.xml
@@ -2,217 +2,174 @@
 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
     <url>
          <loc>https://monarch-initiative.github.io/pheval/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/CODE_OF_CONDUCT/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/about/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/contact/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/contributing/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/developing_a_pheval_plugin/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/exomiser_pipeline/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/pipeline/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/plugins/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/roadmap/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/styleguide/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/cli/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/config_parser/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/constants/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/run_metadata/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/analyse/analysis/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/analyse/benchmark_generator/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/analyse/benchmarking_data/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/analyse/binary_classification_stats/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/analyse/disease_prioritisation_analysis/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/analyse/gene_prioritisation_analysis/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/analyse/generate_plots/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/analyse/generate_summary_outputs/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/analyse/parse_benchmark_summary/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/analyse/parse_pheval_result/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/analyse/prioritisation_rank_recorder/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/analyse/prioritisation_result_types/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/analyse/rank_stats/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/analyse/run_data_parser/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/analyse/variant_prioritisation_analysis/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/infra/exomiserdb/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/post_processing/post_processing/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/prepare/create_noisy_phenopackets/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/prepare/create_spiked_vcf/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/prepare/custom_exceptions/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/prepare/prepare_corpus/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/prepare/update_phenopacket/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/runners/runner/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/utils/exomiser/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/utils/file_utils/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/utils/phenopacket_utils/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/utils/semsim_utils/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
     <url>
          <loc>https://monarch-initiative.github.io/pheval/api/pheval/utils/utils/</loc>
-         <lastmod>2024-07-04</lastmod>
-         <changefreq>daily</changefreq>
+         <lastmod>2024-09-05</lastmod>
     </url>
 </urlset>
\ No newline at end of file
diff --git a/sitemap.xml.gz b/sitemap.xml.gz
index bb315dd3b..2fce9e018 100644
Binary files a/sitemap.xml.gz and b/sitemap.xml.gz differ
diff --git a/styleguide/index.html b/styleguide/index.html
index 6d2e6fd5a..d17311807 100644
--- a/styleguide/index.html
+++ b/styleguide/index.html
@@ -11,7 +11,7 @@
         <link rel="canonical" href="https://monarch-initiative.github.io/pheval/styleguide/">
       
       <link rel="icon" href="../assets/images/favicon.png">
-      <meta name="generator" content="mkdocs-1.4.2, mkdocs-material-8.5.10">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-8.5.10">