Skip to content

Commit

Permalink
Merge branch 'feat/better-metric-failure-handling'
Browse files Browse the repository at this point in the history
  • Loading branch information
nnansters committed Feb 12, 2024
2 parents 59fc796 + dead3bd commit a7884ef
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 44 deletions.
32 changes: 23 additions & 9 deletions nannyml/stats/avg/calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,15 +192,29 @@ def _calculate(self, data: pd.DataFrame, *args, **kwargs) -> Result:

def _calculate_for_column(self, data: pd.DataFrame, column_name: str) -> Dict[str, Any]:
result = {}
value = _calculate_avg_value_stats(data[column_name])
result['value'] = value
result['sampling_error'] = self._sampling_error_components[column_name] / np.sqrt(data.shape[0])
result['upper_confidence_boundary'] = result['value'] + SAMPLING_ERROR_RANGE * result['sampling_error']
result['lower_confidence_boundary'] = result['value'] - SAMPLING_ERROR_RANGE * result['sampling_error']
result['upper_threshold'] = self._upper_alert_thresholds[column_name]
result['lower_threshold'] = self._lower_alert_thresholds[column_name]
result['alert'] = _add_alert_flag(result)
return result
try:
value = _calculate_avg_value_stats(data[column_name])
result['value'] = value
result['sampling_error'] = self._sampling_error_components[column_name] / np.sqrt(data.shape[0])
result['upper_confidence_boundary'] = result['value'] + SAMPLING_ERROR_RANGE * result['sampling_error']
result['lower_confidence_boundary'] = result['value'] - SAMPLING_ERROR_RANGE * result['sampling_error']
result['upper_threshold'] = self._upper_alert_thresholds[column_name]
result['lower_threshold'] = self._lower_alert_thresholds[column_name]
result['alert'] = _add_alert_flag(result)
except Exception as exc:
if self._logger:
self._logger.error(
f"an unexpected exception occurred during calculation of column '{column_name}': " f"{exc}"
)
result['value'] = np.NaN
result['sampling_error'] = np.NaN
result['upper_confidence_boundary'] = np.NaN
result['lower_confidence_boundary'] = np.NaN
result['upper_threshold'] = self._upper_alert_thresholds[column_name]
result['lower_threshold'] = self._lower_alert_thresholds[column_name]
result['alert'] = np.NaN
finally:
return result


def _create_multilevel_index(
Expand Down
38 changes: 26 additions & 12 deletions nannyml/stats/median/calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,18 +196,32 @@ def _calculate(self, data: pd.DataFrame, *args, **kwargs) -> Result:

def _calculate_for_column(self, data: pd.DataFrame, column_name: str) -> Dict[str, Any]:
result = {}
value = _calculate_median_value_stats(data[column_name])
result['value'] = value
result['sampling_error'] = summary_stats_median_sampling_error(
self._sampling_error_components[column_name], data[column_name]
)
result['upper_confidence_boundary'] = result['value'] + SAMPLING_ERROR_RANGE * result['sampling_error']
result['lower_confidence_boundary'] = result['value'] - SAMPLING_ERROR_RANGE * result['sampling_error']

result['upper_threshold'] = self._upper_alert_thresholds[column_name]
result['lower_threshold'] = self._lower_alert_thresholds[column_name]
result['alert'] = _add_alert_flag(result)
return result
try:
value = _calculate_median_value_stats(data[column_name])
result['value'] = value
result['sampling_error'] = summary_stats_median_sampling_error(
self._sampling_error_components[column_name], data[column_name]
)
result['upper_confidence_boundary'] = result['value'] + SAMPLING_ERROR_RANGE * result['sampling_error']
result['lower_confidence_boundary'] = result['value'] - SAMPLING_ERROR_RANGE * result['sampling_error']

result['upper_threshold'] = self._upper_alert_thresholds[column_name]
result['lower_threshold'] = self._lower_alert_thresholds[column_name]
result['alert'] = _add_alert_flag(result)
except Exception as exc:
if self._logger:
self._logger.error(
f"an unexpected exception occurred during calculation of column '{column_name}': " f"{exc}"
)
result['value'] = np.NaN
result['sampling_error'] = np.NaN
result['upper_confidence_boundary'] = np.NaN
result['lower_confidence_boundary'] = np.NaN
result['upper_threshold'] = self._upper_alert_thresholds[column_name]
result['lower_threshold'] = self._lower_alert_thresholds[column_name]
result['alert'] = np.NaN
finally:
return result


def _create_multilevel_index(
Expand Down
42 changes: 28 additions & 14 deletions nannyml/stats/std/calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,21 +197,35 @@ def _calculate(self, data: pd.DataFrame, *args, **kwargs) -> Result:

def _calculate_for_column(self, data: pd.DataFrame, column_name: str) -> Dict[str, Any]:
result = {}
value = _calculate_std_value_stats(data[column_name])
result['value'] = value
result['sampling_error'] = summary_stats_std_sampling_error(
self._sampling_error_components[column_name], data[column_name]
)
result['upper_confidence_boundary'] = result['value'] + SAMPLING_ERROR_RANGE * result['sampling_error']
result['lower_confidence_boundary'] = np.maximum(
result['value'] - SAMPLING_ERROR_RANGE * result['sampling_error'],
-np.inf if self.lower_threshold_value_limit is None else self.lower_threshold_value_limit
)
try:
value = _calculate_std_value_stats(data[column_name])
result['value'] = value
result['sampling_error'] = summary_stats_std_sampling_error(
self._sampling_error_components[column_name], data[column_name]
)
result['upper_confidence_boundary'] = result['value'] + SAMPLING_ERROR_RANGE * result['sampling_error']
result['lower_confidence_boundary'] = np.maximum(
result['value'] - SAMPLING_ERROR_RANGE * result['sampling_error'],
-np.inf if self.lower_threshold_value_limit is None else self.lower_threshold_value_limit,
)

result['upper_threshold'] = self._upper_alert_thresholds[column_name]
result['lower_threshold'] = self._lower_alert_thresholds[column_name]
result['alert'] = _add_alert_flag(result)
return result
result['upper_threshold'] = self._upper_alert_thresholds[column_name]
result['lower_threshold'] = self._lower_alert_thresholds[column_name]
result['alert'] = _add_alert_flag(result)
except Exception as exc:
if self._logger:
self._logger.error(
f"an unexpected exception occurred during calculation of column '{column_name}': " f"{exc}"
)
result['value'] = np.NaN
result['sampling_error'] = np.NaN
result['upper_confidence_boundary'] = np.NaN
result['lower_confidence_boundary'] = np.NaN
result['upper_threshold'] = self._upper_alert_thresholds[column_name]
result['lower_threshold'] = self._lower_alert_thresholds[column_name]
result['alert'] = np.NaN
finally:
return result


def _create_multilevel_index(
Expand Down
32 changes: 23 additions & 9 deletions nannyml/stats/sum/calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,15 +191,29 @@ def _calculate(self, data: pd.DataFrame, *args, **kwargs) -> Result:

def _calculate_for_column(self, data: pd.DataFrame, column_name: str) -> Dict[str, Any]:
result = {}
value = _calculate_sum_value_stats(data[column_name])
result['value'] = value
result['sampling_error'] = self._sampling_error_components[column_name] * np.sqrt(data.shape[0])
result['upper_confidence_boundary'] = result['value'] + SAMPLING_ERROR_RANGE * result['sampling_error']
result['lower_confidence_boundary'] = result['value'] - SAMPLING_ERROR_RANGE * result['sampling_error']
result['upper_threshold'] = self._upper_alert_thresholds[column_name]
result['lower_threshold'] = self._lower_alert_thresholds[column_name]
result['alert'] = _add_alert_flag(result)
return result
try:
value = _calculate_sum_value_stats(data[column_name])
result['value'] = value
result['sampling_error'] = self._sampling_error_components[column_name] * np.sqrt(data.shape[0])
result['upper_confidence_boundary'] = result['value'] + SAMPLING_ERROR_RANGE * result['sampling_error']
result['lower_confidence_boundary'] = result['value'] - SAMPLING_ERROR_RANGE * result['sampling_error']
result['upper_threshold'] = self._upper_alert_thresholds[column_name]
result['lower_threshold'] = self._lower_alert_thresholds[column_name]
result['alert'] = _add_alert_flag(result)
except Exception as exc:
if self._logger:
self._logger.error(
f"an unexpected exception occurred during calculation of column '{column_name}': " f"{exc}"
)
result['value'] = np.NaN
result['sampling_error'] = np.NaN
result['upper_confidence_boundary'] = np.NaN
result['lower_confidence_boundary'] = np.NaN
result['upper_threshold'] = self._upper_alert_thresholds[column_name]
result['lower_threshold'] = self._lower_alert_thresholds[column_name]
result['alert'] = np.NaN
finally:
return result


def _create_multilevel_index(
Expand Down

0 comments on commit a7884ef

Please sign in to comment.