Skip to content

Commit

Permalink
Remove chi2 thresholds for analysis & reference (#349)
Browse files Browse the repository at this point in the history
Chi2 thresholding is based on p-values, which means the calculated
thresholds are irrelevant. Previously thresholds were removed as part of
alerting. As this is only applied on analysis data the threshold value
was still available on reference data, which led to confusion.

This commit changes that so the chi2 threshold values are removed across
the entire dataset.
  • Loading branch information
michael-nml authored Jan 17, 2024
1 parent fa38d24 commit aad7edf
Showing 1 changed file with 8 additions and 3 deletions.
11 changes: 8 additions & 3 deletions nannyml/drift/univariate/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,14 @@ def __init__(self, **kwargs) -> None:
self._p_value: float
self._fitted = False

def fit(self, reference_data: pd.Series, timestamps: Optional[pd.Series] = None) -> Self:
super().fit(reference_data, timestamps)

# Thresholding is based on p-values. Ignoring all custom thresholding and disable plotting a threshold
self.lower_threshold_value = None
self.upper_threshold_value = None
return self

def _fit(self, reference_data: pd.Series, timestamps: Optional[pd.Series] = None) -> Self:
reference_data = _remove_nans(reference_data)
self._reference_data_vcs = reference_data.value_counts().loc[lambda v: v != 0]
Expand All @@ -462,9 +470,6 @@ def _calculate(self, data: pd.Series):
return stat

def alert(self, value: float):
self.lower_threshold_value = None # ignoring all custom thresholding, disable plotting a threshold
self.upper_threshold_value = None # ignoring all custom thresholding, disable plotting a threshold

return self._p_value < 0.05

def _calc_chi2(self, data: pd.Series):
Expand Down

0 comments on commit aad7edf

Please sign in to comment.