From 246ca356bc5127636001e54745eef032acf12ecb Mon Sep 17 00:00:00 2001
From: Michael Van de Steene <michael@nannyml.com>
Date: Fri, 24 May 2024 21:58:57 +0200
Subject: [PATCH] Optimize filtering univariate result for period

The `PerMetricPerColumnResult` filter function has a high overhead for
selecting a subset of columns or metrics. This overhead is also incurred
(and highest) when only filtering for period, as then all columns &
metrics will be selected.

This commit adds a short-circuit path to avoid the overhead when only
the period requires filtering. For a result with 50 columns and 8
metrics this results in a >100x speed-up when only filtering for period.
---
 nannyml/base.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/nannyml/base.py b/nannyml/base.py
index 9cc61326..a0ec7062 100644
--- a/nannyml/base.py
+++ b/nannyml/base.py
@@ -293,13 +293,15 @@ def _filter(
         *args,
         **kwargs,
     ) -> Self:
+        res = super()._filter(period, *args, **kwargs)
+        if metrics is None and column_names is None:
+            return res
+
         if metrics is None:
             metrics = [metric.column_name for metric in self.metrics]
         if column_names is None:
             column_names = self.column_names
 
-        res = super()._filter(period, *args, **kwargs)
-
         data = pd.concat([res.data.loc[:, (['chunk'])], res.data.loc[:, (column_names, metrics)]], axis=1)
         data = data.reset_index(drop=True)