From fcc72e7977af6a7f97c635660ba6ebdeb0d874a9 Mon Sep 17 00:00:00 2001 From: giodavoli Date: Thu, 9 Nov 2023 22:04:12 +0100 Subject: [PATCH] Fix threshold computation handling nan values (#333) * replace std and mean with nanstd and nanmean * fix test * Added test to properly check outcome --------- Co-authored-by: Giovanni Davoli Co-authored-by: Niels Nuyttens --- nannyml/thresholds.py | 6 +++--- tests/test_thresholds.py | 10 +++++++++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/nannyml/thresholds.py b/nannyml/thresholds.py index 68cf1af8..e27c2786 100644 --- a/nannyml/thresholds.py +++ b/nannyml/thresholds.py @@ -153,7 +153,7 @@ def __init__( self, std_lower_multiplier: Optional[Union[float, int]] = 3, std_upper_multiplier: Optional[Union[float, int]] = 3, - offset_from: Callable[[np.ndarray], Any] = np.mean, + offset_from: Callable[[np.ndarray], Any] = np.nanmean, ): """Creates a new StandardDeviationThreshold instance. @@ -166,7 +166,7 @@ def __init__( The number the standard deviation of the input array will be multiplied with to form the upper offset. This value will be added to the aggregate of the input array. Defaults to 3. - offset_from: Callable[[np.ndarray], Any], default=np.mean + offset_from: Callable[[np.ndarray], Any], default=np.nanmean A function that will be applied to the input array to aggregate it into a single value. Adding the upper offset to this value will yield the upper threshold, subtracting the lower offset will yield the lower threshold. @@ -180,7 +180,7 @@ def __init__( def thresholds(self, data: np.ndarray, **kwargs) -> Tuple[Optional[float], Optional[float]]: aggregate = self.offset_from(data) - std = np.std(data) + std = np.nanstd(data) lower_threshold = aggregate - std * self.std_lower_multiplier if self.std_lower_multiplier is not None else None diff --git a/tests/test_thresholds.py b/tests/test_thresholds.py index dcd50d89..c3f429ee 100644 --- a/tests/test_thresholds.py +++ b/tests/test_thresholds.py @@ -72,7 +72,7 @@ def test_standard_deviation_threshold_init_sets_default_instance_attributes(): assert sut.std_lower_multiplier == 3 assert sut.std_upper_multiplier == 3 - assert sut.offset_from == np.mean + assert sut.offset_from == np.nanmean @pytest.mark.parametrize( @@ -153,3 +153,11 @@ def test_standard_deviation_threshold_raises_threshold_exception_when_negative_l def test_standard_deviation_threshold_raises_threshold_exception_when_negative_upper_multiplier_given(): with pytest.raises(ThresholdException, match="'std_upper_multiplier' should be greater than 0 but got value -1"): _ = StandardDeviationThreshold(0, -1) + + +def test_standard_deviation_threshold_deals_with_nan_values(): + t = StandardDeviationThreshold() + upper, lower = t.thresholds(np.asarray([-1, 1, np.nan, 1, np.nan])) + + assert not np.isnan(upper) + assert not np.isnan(lower)